From e620ab7ed30ddaab44537ba2de9f5c803a199726 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 2 Apr 2022 08:50:58 +1100 Subject: [PATCH 001/215] added new profiling module task granularity profiling is done for each task execution and the required information about the task executed is written to the trace. The execution time is not explicitly calculated as it can be found from each event begin and end in the trace. --- .../task_granularity/ValidateModule.CMake | 8 + .../task_granularity/pins_task_granularity.h | 25 +++ .../pins_task_granularity_component.c | 74 +++++++ .../pins_task_granularity_module.c | 189 ++++++++++++++++++ 4 files changed, 296 insertions(+) create mode 100644 parsec/mca/pins/task_granularity/ValidateModule.CMake create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity.h create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_component.c create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_module.c diff --git a/parsec/mca/pins/task_granularity/ValidateModule.CMake b/parsec/mca/pins/task_granularity/ValidateModule.CMake new file mode 100644 index 000000000..9e4a2787f --- /dev/null +++ b/parsec/mca/pins/task_granularity/ValidateModule.CMake @@ -0,0 +1,8 @@ +if (PARSEC_PROF_PINS) + SET(MCA_${COMPONENT}_${MODULE} ON) + FILE(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + SET(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else (PARSEC_PROF_PINS) + MESSAGE(STATUS "Module ${MODULE} not selectable: PINS disabled.") + SET(MCA_${COMPONENT}_${MODULE} OFF) +endif (PARSEC_PROF_PINS) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity.h b/parsec/mca/pins/task_granularity/pins_task_granularity.h new file mode 100644 index 000000000..2e05d8f1d --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity.h @@ -0,0 +1,25 @@ +#ifndef PINS_task_granularity_H +#define PINS_task_granularity_H + +#include "parsec/parsec_config.h" +#include "parsec/runtime.h" +#include "parsec/mca/mca.h" +#include "parsec/mca/pins/pins.h" + + +#define NUM_SELECT_EVENTS 2 +#define SYSTEM_QUEUE_VP -2 + +BEGIN_C_DECLS + +/** + * Globally exported variable + */ +PARSEC_DECLSPEC extern const parsec_pins_base_component_t parsec_pins_task_granularity_component; +PARSEC_DECLSPEC extern const parsec_pins_module_t parsec_pins_task_granularity_module; +/* static accessor */ +mca_base_component_t * pins_task_granularity_static_component(void); + +END_C_DECLS + +#endif diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_component.c b/parsec/mca/pins/task_granularity/pins_task_granularity_component.c new file mode 100644 index 000000000..665ebb876 --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_component.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "parsec/parsec_config.h" +#include "parsec/runtime.h" + +#include "parsec/mca/pins/pins.h" +#include "parsec/mca/pins/task_granularity/pins_task_granularity.h" + +/* + * Local function + */ +static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +const parsec_pins_base_component_t parsec_pins_task_granularity_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + PARSEC_PINS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "task_granularity", + "", /* options */ + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + /* Component open and close functions */ + NULL, + NULL, + pins_task_granularity_component_query, + /*< specific query to return the module and add it to the list of available modules */ + NULL, + "", /*< no reserve */ + }, + { + /* The component has no metadata */ + MCA_BASE_METADATA_PARAM_NONE, + "", /*< no reserve */ + } +}; +mca_base_component_t * pins_task_granularity_static_component(void) +{ + return (mca_base_component_t *)&parsec_pins_task_granularity_component; +} + +static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority) +{ + /* module type should be: const mca_base_module_t ** */ + void *ptr = (void*)&parsec_pins_task_granularity_module; + *priority = 6; + *module = (mca_base_module_t *)ptr; + return MCA_SUCCESS; +} + diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c new file mode 100644 index 000000000..73680d3b6 --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#include +#include + +#include "parsec/parsec_config.h" +#include "pins_task_granularity.h" +#include "parsec/mca/pins/pins.h" +#include "parsec/utils/debug.h" +#include "parsec/execution_stream.h" +#include "parsec/profiling.h" +#include "parsec/parsec_internal.h" +#include "parsec/os-spec-timing.h" + +static void pins_init_task_granularity(parsec_context_t* master_context); +static void pins_fini_task_granularity(parsec_context_t* master_context); +static void pins_thread_init_task_granularity(parsec_execution_stream_t* es); +static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es); + +static FILE *file_ptr; +parsec_atomic_lock_t lock; + +int task_granularity_trace_keyin; +int task_granularity_trace_keyout; + +typedef struct task_characteristics_s +{ + int taskpool_id; + int task_class_id; + int nb_data_items; + int total_data_size; + int priority; + int chore_id; +} task_characteristics_t; + +const parsec_pins_module_t parsec_pins_task_granularity_module = { + &parsec_pins_task_granularity_component, + { + pins_init_task_granularity, + pins_fini_task_granularity, + NULL, + NULL, + pins_thread_init_task_granularity, + pins_thread_fini_task_granularity + }, + { NULL } +}; + + +static void start_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data); + +static void stop_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data); + + +static void pins_init_task_granularity(parsec_context_t* master) +{ + (void)master; + parsec_profiling_add_dictionary_keyword("TASK_GRANULARITY", "fill:#FF0000", + sizeof(task_characteristics_t), + "taskpool_id{int32_t};task_class_id{int32_t};nb_data_items{int32_t};total_data_size{int32_t};priority{int32_t};chore_id{int32_t}", + &task_granularity_trace_keyin, + &task_granularity_trace_keyout); + +} + +static void pins_fini_task_granularity(parsec_context_t* master) +{ + (void)master; +} + +static void pins_thread_init_task_granularity(parsec_execution_stream_t* es) +{ + parsec_pins_next_callback_t* event_cb; + + event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); + PARSEC_PINS_REGISTER(es, EXEC_BEGIN, start_task_granularity_record, event_cb); + event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); + PARSEC_PINS_REGISTER(es, EXEC_END, stop_task_granularity_record, event_cb); +} + +static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es) +{ + task_characteristics_t characteristics; + parsec_pins_next_callback_t* event_cb; + + PARSEC_PINS_UNREGISTER(es, EXEC_BEGIN, start_task_granularity_record, &event_cb); + free(event_cb); + PARSEC_PINS_UNREGISTER(es, EXEC_END, stop_task_granularity_record, &event_cb); + free(event_cb); +} + +static void start_task_granularity_record(parsec_execution_stream_t* es, + struct parsec_task_s* task, + parsec_pins_next_callback_t* data) +{ + + task_characteristics_t characteristics; + + PARSEC_PROFILING_TRACE(es->es_profile, + task_granularity_trace_keyin, + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + (void*)&characteristics); +} + +int find_chore(parsec_execution_stream_t* es, parsec_task_t* task) +{ + const parsec_task_class_t* tc = task->task_class; + uint8_t chore_mask = task->chore_mask; + parsec_evaluate_function_t* eval; + unsigned int chore_id; + int rc; + + /* Find first bit in chore_mask that is not 0 */ + for(chore_id = 0; NULL != tc->incarnations[chore_id].hook; chore_id++) + if( 0 != (chore_mask & (1<incarnations[chore_id].evaluate) ) { + rc = eval(task); + if( PARSEC_HOOK_RETURN_DONE != rc ) { + if( PARSEC_HOOK_RETURN_NEXT != rc ) { + break; + } + goto next_chore; + } + } + + return chore_id; + + next_chore: + /* Mark this chore as tested */ + chore_mask &= ~( 1<incarnations[chore_id].hook; chore_id++) + if( 0 != (chore_mask & (1<incarnations[chore_id].hook); + + return PARSEC_HOOK_RETURN_ERROR; +} + +int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) +{ + int i, total_data = 0, nb_elements = 0, size = 0; + int nb_param = task->task_class->nb_parameters; + + for(i = 0; i < nb_param; i++) + { + nb_elements = task->data[i].data_in->arena_chunk->count; + size = task->data[i].data_in->arena_chunk->origin->elem_size; + total_data += nb_elements * size; + } + + return total_data; + +} + +static void stop_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data) +{ + task_characteristics_t characteristics; + + characteristics.taskpool_id = task->taskpool->taskpool_id; + characteristics.task_class_id = task->task_class->task_class_id; + characteristics.nb_data_items = task->task_class->nb_parameters; + characteristics.total_data_size = find_data_size(es, task); + characteristics.priority = task->priority; + characteristics.chore_id = find_chore(es, task); + + PARSEC_PROFILING_TRACE(es->es_profile, + task_granularity_trace_keyout, + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + (void*)&characteristics); + +} + + From 5a393a5b10b0e83e7ea5172885bcb9737cb1065a Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 2 Apr 2022 08:50:58 +1100 Subject: [PATCH 002/215] added new profiling module task granularity profiling is done for each task execution and the required information about the task executed is written to the trace. The execution time is not explicitly calculated as it can be found from each event begin and end in the trace. --- .../task_granularity/ValidateModule.CMake | 8 + .../task_granularity/pins_task_granularity.h | 25 +++ .../pins_task_granularity_component.c | 74 +++++++ .../pins_task_granularity_module.c | 189 ++++++++++++++++++ 4 files changed, 296 insertions(+) create mode 100644 parsec/mca/pins/task_granularity/ValidateModule.CMake create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity.h create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_component.c create mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_module.c diff --git a/parsec/mca/pins/task_granularity/ValidateModule.CMake b/parsec/mca/pins/task_granularity/ValidateModule.CMake new file mode 100644 index 000000000..9e4a2787f --- /dev/null +++ b/parsec/mca/pins/task_granularity/ValidateModule.CMake @@ -0,0 +1,8 @@ +if (PARSEC_PROF_PINS) + SET(MCA_${COMPONENT}_${MODULE} ON) + FILE(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) + SET(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") +else (PARSEC_PROF_PINS) + MESSAGE(STATUS "Module ${MODULE} not selectable: PINS disabled.") + SET(MCA_${COMPONENT}_${MODULE} OFF) +endif (PARSEC_PROF_PINS) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity.h b/parsec/mca/pins/task_granularity/pins_task_granularity.h new file mode 100644 index 000000000..2e05d8f1d --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity.h @@ -0,0 +1,25 @@ +#ifndef PINS_task_granularity_H +#define PINS_task_granularity_H + +#include "parsec/parsec_config.h" +#include "parsec/runtime.h" +#include "parsec/mca/mca.h" +#include "parsec/mca/pins/pins.h" + + +#define NUM_SELECT_EVENTS 2 +#define SYSTEM_QUEUE_VP -2 + +BEGIN_C_DECLS + +/** + * Globally exported variable + */ +PARSEC_DECLSPEC extern const parsec_pins_base_component_t parsec_pins_task_granularity_component; +PARSEC_DECLSPEC extern const parsec_pins_module_t parsec_pins_task_granularity_module; +/* static accessor */ +mca_base_component_t * pins_task_granularity_static_component(void); + +END_C_DECLS + +#endif diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_component.c b/parsec/mca/pins/task_granularity/pins_task_granularity_component.c new file mode 100644 index 000000000..665ebb876 --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_component.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "parsec/parsec_config.h" +#include "parsec/runtime.h" + +#include "parsec/mca/pins/pins.h" +#include "parsec/mca/pins/task_granularity/pins_task_granularity.h" + +/* + * Local function + */ +static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +const parsec_pins_base_component_t parsec_pins_task_granularity_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + PARSEC_PINS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "task_granularity", + "", /* options */ + PARSEC_VERSION_MAJOR, + PARSEC_VERSION_MINOR, + + /* Component open and close functions */ + NULL, + NULL, + pins_task_granularity_component_query, + /*< specific query to return the module and add it to the list of available modules */ + NULL, + "", /*< no reserve */ + }, + { + /* The component has no metadata */ + MCA_BASE_METADATA_PARAM_NONE, + "", /*< no reserve */ + } +}; +mca_base_component_t * pins_task_granularity_static_component(void) +{ + return (mca_base_component_t *)&parsec_pins_task_granularity_component; +} + +static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority) +{ + /* module type should be: const mca_base_module_t ** */ + void *ptr = (void*)&parsec_pins_task_granularity_module; + *priority = 6; + *module = (mca_base_module_t *)ptr; + return MCA_SUCCESS; +} + diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c new file mode 100644 index 000000000..73680d3b6 --- /dev/null +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#include +#include + +#include "parsec/parsec_config.h" +#include "pins_task_granularity.h" +#include "parsec/mca/pins/pins.h" +#include "parsec/utils/debug.h" +#include "parsec/execution_stream.h" +#include "parsec/profiling.h" +#include "parsec/parsec_internal.h" +#include "parsec/os-spec-timing.h" + +static void pins_init_task_granularity(parsec_context_t* master_context); +static void pins_fini_task_granularity(parsec_context_t* master_context); +static void pins_thread_init_task_granularity(parsec_execution_stream_t* es); +static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es); + +static FILE *file_ptr; +parsec_atomic_lock_t lock; + +int task_granularity_trace_keyin; +int task_granularity_trace_keyout; + +typedef struct task_characteristics_s +{ + int taskpool_id; + int task_class_id; + int nb_data_items; + int total_data_size; + int priority; + int chore_id; +} task_characteristics_t; + +const parsec_pins_module_t parsec_pins_task_granularity_module = { + &parsec_pins_task_granularity_component, + { + pins_init_task_granularity, + pins_fini_task_granularity, + NULL, + NULL, + pins_thread_init_task_granularity, + pins_thread_fini_task_granularity + }, + { NULL } +}; + + +static void start_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data); + +static void stop_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data); + + +static void pins_init_task_granularity(parsec_context_t* master) +{ + (void)master; + parsec_profiling_add_dictionary_keyword("TASK_GRANULARITY", "fill:#FF0000", + sizeof(task_characteristics_t), + "taskpool_id{int32_t};task_class_id{int32_t};nb_data_items{int32_t};total_data_size{int32_t};priority{int32_t};chore_id{int32_t}", + &task_granularity_trace_keyin, + &task_granularity_trace_keyout); + +} + +static void pins_fini_task_granularity(parsec_context_t* master) +{ + (void)master; +} + +static void pins_thread_init_task_granularity(parsec_execution_stream_t* es) +{ + parsec_pins_next_callback_t* event_cb; + + event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); + PARSEC_PINS_REGISTER(es, EXEC_BEGIN, start_task_granularity_record, event_cb); + event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); + PARSEC_PINS_REGISTER(es, EXEC_END, stop_task_granularity_record, event_cb); +} + +static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es) +{ + task_characteristics_t characteristics; + parsec_pins_next_callback_t* event_cb; + + PARSEC_PINS_UNREGISTER(es, EXEC_BEGIN, start_task_granularity_record, &event_cb); + free(event_cb); + PARSEC_PINS_UNREGISTER(es, EXEC_END, stop_task_granularity_record, &event_cb); + free(event_cb); +} + +static void start_task_granularity_record(parsec_execution_stream_t* es, + struct parsec_task_s* task, + parsec_pins_next_callback_t* data) +{ + + task_characteristics_t characteristics; + + PARSEC_PROFILING_TRACE(es->es_profile, + task_granularity_trace_keyin, + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + (void*)&characteristics); +} + +int find_chore(parsec_execution_stream_t* es, parsec_task_t* task) +{ + const parsec_task_class_t* tc = task->task_class; + uint8_t chore_mask = task->chore_mask; + parsec_evaluate_function_t* eval; + unsigned int chore_id; + int rc; + + /* Find first bit in chore_mask that is not 0 */ + for(chore_id = 0; NULL != tc->incarnations[chore_id].hook; chore_id++) + if( 0 != (chore_mask & (1<incarnations[chore_id].evaluate) ) { + rc = eval(task); + if( PARSEC_HOOK_RETURN_DONE != rc ) { + if( PARSEC_HOOK_RETURN_NEXT != rc ) { + break; + } + goto next_chore; + } + } + + return chore_id; + + next_chore: + /* Mark this chore as tested */ + chore_mask &= ~( 1<incarnations[chore_id].hook; chore_id++) + if( 0 != (chore_mask & (1<incarnations[chore_id].hook); + + return PARSEC_HOOK_RETURN_ERROR; +} + +int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) +{ + int i, total_data = 0, nb_elements = 0, size = 0; + int nb_param = task->task_class->nb_parameters; + + for(i = 0; i < nb_param; i++) + { + nb_elements = task->data[i].data_in->arena_chunk->count; + size = task->data[i].data_in->arena_chunk->origin->elem_size; + total_data += nb_elements * size; + } + + return total_data; + +} + +static void stop_task_granularity_record(parsec_execution_stream_t* es, + parsec_task_t* task, + parsec_pins_next_callback_t* data) +{ + task_characteristics_t characteristics; + + characteristics.taskpool_id = task->taskpool->taskpool_id; + characteristics.task_class_id = task->task_class->task_class_id; + characteristics.nb_data_items = task->task_class->nb_parameters; + characteristics.total_data_size = find_data_size(es, task); + characteristics.priority = task->priority; + characteristics.chore_id = find_chore(es, task); + + PARSEC_PROFILING_TRACE(es->es_profile, + task_granularity_trace_keyout, + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), + (void*)&characteristics); + +} + + From e47190e4ab9e56f94c658b56285e694c2825e7a6 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 1 Apr 2022 03:18:57 +1100 Subject: [PATCH 003/215] task_characteristics_t structure updated --- parsec/mca/pins/task_granularity/pins_task_granularity_module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c index 73680d3b6..500f765c1 100644 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -163,6 +163,8 @@ int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) return total_data; + #endif + } static void stop_task_granularity_record(parsec_execution_stream_t* es, From 566297f0e0de6d5ede57d9dac5d276e021a28bba Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 6 Apr 2022 06:07:47 +1000 Subject: [PATCH 004/215] find_data_size() updated to deal with corner cases --- .../pins_task_granularity_module.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c index 73680d3b6..ce2edbee2 100644 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -153,12 +153,25 @@ int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) { int i, total_data = 0, nb_elements = 0, size = 0; int nb_param = task->task_class->nb_parameters; + struct parsec_data_copy_s* data; for(i = 0; i < nb_param; i++) { - nb_elements = task->data[i].data_in->arena_chunk->count; - size = task->data[i].data_in->arena_chunk->origin->elem_size; - total_data += nb_elements * size; + data = task->data[i].data_in; + if(data == NULL) + data = task->data[i].data_out; + + if(data != NULL) + { + if(data->arena_chunk != NULL) + { + nb_elements = data->arena_chunk->count; + + if(size = data->arena_chunk->origin != NULL) + size = data->arena_chunk->origin->elem_size; + } + total_data += nb_elements * size; + } } return total_data; From b5764dba4f3da2ad7e84d003b3e61d3d3a55b45b Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Thu, 31 Mar 2022 17:08:47 -0400 Subject: [PATCH 005/215] Since new policy, setting the non-cache variable has no effect on the same-name cache variable and breaks findHWLOC from PaRSECConfig.cmake Signed-off-by: Aurelien Bouteiller --- cmake_modules/FindOTF2.cmake | 6 +++--- cmake_modules/PaRSECConfig.cmake.in | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cmake_modules/FindOTF2.cmake b/cmake_modules/FindOTF2.cmake index 7dd8ae5c3..e0aa9875e 100644 --- a/cmake_modules/FindOTF2.cmake +++ b/cmake_modules/FindOTF2.cmake @@ -63,9 +63,9 @@ ELSE(NOT OTF2_CONFIG) ${OTF2_LINK_DIRS} ) IF(_OTF2_LIB_FROM_ARG) - IF(_OTF2_LIBRARY_TEST) - SET(OTF2_LIBRARY ${_OTF2_LIB_FROM_ARG}) - ENDIF(_OTF2_LIBRARY_TEST) + IF(_OTF2_LIBRARY_TEST) + SET(OTF2_LIBRARY ${_OTF2_LIB_FROM_ARG}) + ENDIF(_OTF2_LIBRARY_TEST) SET(OTF2_LIBRARIES ${OTF2_LIBRARIES} ${_OTF2_LIB_FROM_ARG}) ENDIF(_OTF2_LIB_FROM_ARG) UNSET(_OTF2_LIB_FROM_ARG CACHE) diff --git a/cmake_modules/PaRSECConfig.cmake.in b/cmake_modules/PaRSECConfig.cmake.in index c6a2d7d69..429ee4a33 100644 --- a/cmake_modules/PaRSECConfig.cmake.in +++ b/cmake_modules/PaRSECConfig.cmake.in @@ -20,8 +20,10 @@ find_package(Threads) if(@PARSEC_HAVE_HWLOC@) set_and_check(HWLOC_INCLUDE_DIR "@HWLOC_INCLUDE_DIR@") + set(HWLOC_INCLUDE_DIR ${HWLOC_INCLUDE_DIR} CACHE PATH "Imported by PaRSECConfig.cmake" FORCE) set_and_check(HWLOC_LIBRARY "@HWLOC_LIBRARY@") - find_package(HWLOC REQUIRED) + set(HWLOC_LIBRARY ${HWLOC_LIBRARY} CACHE PATH "Imported by PaRSECConfig.cmake" FORCE) + find_package(HWLOC REQUIRED MODULE) endif(@PARSEC_HAVE_HWLOC@) if(@PARSEC_HAVE_OTF2@) @@ -40,8 +42,10 @@ endif(@PARSEC_HAVE_OTF2@) if(@PARSEC_HAVE_PAPI@) set_and_check(PAPI_INCLUDE_DIR "@PAPI_INCLUDE_DIR@") + set(PAPI_INCLUDE_DIR ${PAPI_INCLUDE_DIR} CACHE PATH "Imported by PaRSECConfig.cmake" FORCE) set_and_check(PAPI_LIBRARY "@PAPI_LIBRARY@") - find_package(PAPI REQUIRED) + set(PAPI_LIBRARY ${PAPI_LIBRARY} CACHE PATH "Imported by PaRSECConfig.cmake" FORCE) + find_package(PAPI REQUIRED MODULE) endif(@PARSEC_HAVE_PAPI@) if(@PARSEC_DIST_WITH_MPI@) From 341a2f390e1a851b3fde5c8a8dccef85e2af9acf Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Mon, 4 Apr 2022 17:11:48 -0400 Subject: [PATCH 006/215] PRIVATE/PUBLIC_HEADER_H files may be in the build dir or the source dir, handle both cases --- parsec/CMakeLists.txt | 6 ++++++ parsec/data_dist/matrix/CMakeLists.txt | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index 6b2550e3a..6aca9788f 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -266,11 +266,17 @@ if( BUILD_PARSEC ) get_target_property(_public_headers parsec PUBLIC_HEADER_H) foreach(_FILE ${_public_headers}) get_filename_component(_DIR ${_FILE} DIRECTORY) + if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_FILE}") + set(_FILE "${CMAKE_CURRENT_BINARY_DIR}/${_FILE}") + endif() INSTALL(FILES ${_FILE} DESTINATION ${PARSEC_INSTALL_INCLUDEDIR}/parsec/${_DIR}) endforeach() get_target_property(_private_headers parsec PRIVATE_HEADER_H) foreach(_FILE ${_private_headers}) get_filename_component(_DIR ${_FILE} DIRECTORY) + if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_FILE}") + set(_FILE "${CMAKE_CURRENT_BINARY_DIR}/${_FILE}") + endif() INSTALL(FILES ${_FILE} DESTINATION ${PARSEC_INSTALL_INCLUDEDIR}/parsec/${_DIR}) endforeach() diff --git a/parsec/data_dist/matrix/CMakeLists.txt b/parsec/data_dist/matrix/CMakeLists.txt index 91e8451dc..51beffec2 100644 --- a/parsec/data_dist/matrix/CMakeLists.txt +++ b/parsec/data_dist/matrix/CMakeLists.txt @@ -32,7 +32,7 @@ if( TARGET parsec-ptgpp ) target_ptg_sources(parsec PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/reduce_col.jdf;${CMAKE_CURRENT_SOURCE_DIR}/reduce_row.jdf;${CMAKE_CURRENT_SOURCE_DIR}/reduce.jdf;${CMAKE_CURRENT_SOURCE_DIR}/diag_band_to_rect.jdf;${CMAKE_CURRENT_SOURCE_DIR}/apply.jdf") set_property(TARGET parsec APPEND PROPERTY - PRIVATE_HEADER_H ${CMAKE_CURRENT_BINARY_DIR}/diag_band_to_rect.h) + PRIVATE_HEADER_H data_dist/matrix/diag_band_to_rect.h) endif( TARGET parsec-ptgpp ) target_sources(parsec PRIVATE ${sources}) From 5bf82331136fa931b7fad5969158c53bd48f8831 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 6 Apr 2022 10:11:04 -0400 Subject: [PATCH 007/215] find_data_size() corrected find_data_size() was using number of parameters to find the total data the task was operating on. This was corrected, now we use number of flows to calculate the total data. --- .../pins_task_granularity_module.c | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c index ce2edbee2..5bf928f28 100644 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -149,35 +149,35 @@ int find_chore(parsec_execution_stream_t* es, parsec_task_t* task) return PARSEC_HOOK_RETURN_ERROR; } + int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) { int i, total_data = 0, nb_elements = 0, size = 0; - int nb_param = task->task_class->nb_parameters; - struct parsec_data_copy_s* data; + int nb_flows = task->task_class->nb_flows; + struct parsec_data_copy_s* task_data; - for(i = 0; i < nb_param; i++) + for(i = 0; i < nb_flows; i++) { - data = task->data[i].data_in; - if(data == NULL) - data = task->data[i].data_out; - - if(data != NULL) + task_data = task->data[i].data_in; + if(task_data == NULL) + task_data = task->data[i].data_out; + + if(task_data != NULL) { - if(data->arena_chunk != NULL) + if(task_data->arena_chunk != NULL) { - nb_elements = data->arena_chunk->count; - - if(size = data->arena_chunk->origin != NULL) - size = data->arena_chunk->origin->elem_size; + nb_elements = task_data->arena_chunk->count; + if(size = task_data->arena_chunk->origin != NULL) + size = task_data->arena_chunk->origin->elem_size; } total_data += nb_elements * size; } } - return total_data; - } + + static void stop_task_granularity_record(parsec_execution_stream_t* es, parsec_task_t* task, parsec_pins_next_callback_t* data) From 948c0fce68121e996685309efe34e44dba01301b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 13 Apr 2022 01:25:03 +1000 Subject: [PATCH 008/215] find_data_size() updated. First arena is queried for the task data size. If arena is NULL original is queried for the data size. --- .../task_granularity/pins_task_granularity_module.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c index 5bf928f28..e47478a97 100644 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -153,24 +153,27 @@ int find_chore(parsec_execution_stream_t* es, parsec_task_t* task) int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) { int i, total_data = 0, nb_elements = 0, size = 0; - int nb_flows = task->task_class->nb_flows; struct parsec_data_copy_s* task_data; - for(i = 0; i < nb_flows; i++) + for(i = 0; i < task->task_class->nb_flows; i++) { task_data = task->data[i].data_in; if(task_data == NULL) task_data = task->data[i].data_out; if(task_data != NULL) - { + { if(task_data->arena_chunk != NULL) { nb_elements = task_data->arena_chunk->count; - if(size = task_data->arena_chunk->origin != NULL) + if(task_data->arena_chunk->origin != NULL) size = task_data->arena_chunk->origin->elem_size; } - total_data += nb_elements * size; + else if(task_data->original != NULL) + total_data += task_data->original->nb_elts; + else + printf("SOMETHING IS WRONG Name %s Id %d \n", task->task_class->name, task->task_class->task_class_id); + } } return total_data; From ce9621a90895aaa57b74cd74f1a062c293ab752e Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 13 Apr 2022 06:04:20 +1000 Subject: [PATCH 009/215] find_data_size() simplified --- .../pins/task_granularity/pins_task_granularity_module.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c index e47478a97..8d8034650 100644 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c @@ -163,12 +163,8 @@ int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) if(task_data != NULL) { - if(task_data->arena_chunk != NULL) - { - nb_elements = task_data->arena_chunk->count; - if(task_data->arena_chunk->origin != NULL) - size = task_data->arena_chunk->origin->elem_size; - } + if(task_data->arena_chunk != NULL && task_data->arena_chunk->origin != NULL) + total_data += task_data->arena_chunk->count * task_data->arena_chunk->origin->elem_size; else if(task_data->original != NULL) total_data += task_data->original->nb_elts; else From 0504d4124407ba1d97e1c07bbbff857bacba90e2 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Apr 2022 23:47:42 +1000 Subject: [PATCH 010/215] migration codes --- parsec/mca/device/cuda/device_cuda_migrate.c | 220 +++++++++++++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 36 +++ 2 files changed, 256 insertions(+) create mode 100644 parsec/mca/device/cuda/device_cuda_migrate.c create mode 100644 parsec/mca/device/cuda/device_cuda_migrate.h diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c new file mode 100644 index 000000000..cfcdf11da --- /dev/null +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -0,0 +1,220 @@ + +#include "parsec/mca/device/cuda/device_cuda_migrate.h" + +extern int parsec_device_cuda_enabled; +static parsec_device_cuda_info_t* device_info; + + +/** + * @brief The function initialises the data structures required + * for inter-device migration. + * + * @param ndevices number of devices + * @return int + */ + +int parsec_cuda_migrate_init(int ndevices) +{ + int i; + cudaError_t cudastatus; + nvmlReturn_t nvml_ret; + + device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); + + for(i = 0; i < ndevices; i++) + { + device_info[i].task_count = 0; + device_info[i].load = 0; + } + + nvml_ret = nvmlInit_v2(); + + return 0; + +} + +int parsec_cuda_migrate_fini(int ndevices) +{ + free(device_info); + nvmlShutdown(); + + return 0; + +} + +/** + * @brief returns the load of a particular device + * + * nvml_utilization has two fields - gpu and memory + * gpu - Percent of time over the past sample period during which one or more kernels was executing on the GPU. + * memory - Percent of time over the past sample period during which global (device) memory was being read or written + * + * @param device index of the device + * @return int + */ + +int parsec_cuda_get_device_load(int device) +{ + unsigned int nvml_dev; + nvmlDevice_t nvml_device; + nvmlUtilization_t nvml_utilization; + + nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); + nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); + device_info[device].load = nvml_utilization.gpu; + + printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); + + return device_info[device].load; + +} + +/** + * @brief returns the number of tasks in a particular device + * + * @param device index of the device + * @return int + */ + +int parsec_cuda_get_device_task(int device) +{ + return device_info[device].task_count; +} + + +/** + * @brief sets the load of a particular device + * + * @param device index of the device + * @return int + */ + +int parsec_cuda_set_device_load(int device, int load) +{ + device_info[device].load += load; + return device_info[device].load; +} + +/** + * @brief sets the number of tasks in a particular device + * + * @param device index of the device + * @return int + */ + +int parsec_cuda_set_device_task(int device, int task_count) +{ + device_info[device].task_count += task_count; + return device_info[device].task_count; +} + +/** + * @brief returns 1 if the device is starving, 0 if its is not + * + * @param device index number of the device + * @return int + * + * TODO: needs updation + */ +int is_starving(int device) +{ + if( device_info[device].load < 1 && device_info[device].task_count < 1 ) + return 1; + else + return 0; +} + +/** + * @brief returns the index of a starving device and returns -1 + * if no device is starving. + * + * @param dealer_device device probing for a starving device + * @param ndevice total number of devices + * @return int + * + * TODO: needs updation + */ +int find_starving_device(int dealer_device, int ndevice) +{ + int i; + printf(" find_starving_device: Total_Dev %d Dealer_Dev %d\n", ndevice, dealer_device); + + // 0 device is the CPU, 1 is recursive + for(i = 2; i < (2 + ndevice); i++) + { + printf("Trying_Dev %d Dealer_Dev %d\n", i, dealer_device); + if( i == dealer_device) + continue; + + //if(is_starving(i)) + return i; + } + + return -1; +} + + +/** + * @brief selects a new starving device instead of the originally + * intended device. This enables migration of a task before it + * is scheduled to any particular device. + * + * @param dealer_device_index the device the task was initially assigned to + * @return parsec_device_gpu_module_t* + * + */ +parsec_device_gpu_module_t* +parsec_cuda_change_device( int dealer_device_index) +{ + int starving_device_index; + parsec_device_gpu_module_t* starving_gpu_device; + + printf("parsec_cuda_change_device: Total_Dev %d Dealer_Dev %d\n", + parsec_device_cuda_enabled, dealer_device_index); + + starving_device_index = find_starving_device(dealer_device_index, parsec_device_cuda_enabled); + + if(starving_device_index == -1) + starving_device_index = dealer_device_index; + starving_gpu_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(starving_device_index); + + printf(" Starving_dev %d \n", starving_device_index); + + return starving_gpu_device; +} + + +/** + * This function migrate a specific task from a device a + * to another. + * + * Returns: negative number if any error occured. + * positive: starving device index. + */ +int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, + parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t *migrated_gpu_task) +{ + printf("TRIAL parsec_cuda_kernel_migrate \n"); + + int starving_device_index, dealer_device_index; + parsec_device_gpu_module_t* starving_gpu_device; + + dealer_device_index = dealer_device->super.device_index; + starving_device_index = find_starving_device(dealer_device_index, parsec_device_cuda_enabled); + + if(starving_device_index == -1) + return -1; + + /** + * @brief The distance value in normal parsec scheduler is laways positive. So a negative + * distance value can be used to communicate the device index of the staving node to the + * qpd scheduler. The distance is calucaled as distance = ( (starving device index) * -1 ) -1 + * + */ + __parsec_schedule(es, (parsec_task_t *) migrated_gpu_task, (starving_device_index * -1) - 1); + printf("Task migrated to device %d \n", starving_device_index); + + return starving_device_index; +} + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h new file mode 100644 index 000000000..5ccfcb067 --- /dev/null +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -0,0 +1,36 @@ +#ifndef PARSEC_DEVICE_CUDA_MIGRATE_H +#define PARSEC_DEVICE_CUDA_MIGRATE_H + + +#include "parsec/parsec_config.h" +#include "parsec/parsec_internal.h" +#include "parsec/mca/device/cuda/device_cuda_internal.h" +#include "parsec/scheduling.h" +#include +#include + + +typedef struct parsec_device_cuda_info_s { + int task_count; + int load; + //parsec_atomic_lock_t lock; +} parsec_device_cuda_info_t; + +int parsec_cuda_migrate_init(int ndevices); +int parsec_cuda_get_device_load(int device); +int parsec_cuda_get_device_task(int device); +int parsec_cuda_set_device_load(int device, int load); +int parsec_cuda_set_device_task(int device, int task_count); +int is_starving(int device); +int find_starving_device(int dealer_device, int ndevice); +parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); +int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, + parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t *migrated_gpu_task); + + +#endif + + + + From 2ab59a5e884e862adc021ef6d7ca6fa1000bf7e2 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 19 Apr 2022 01:32:47 +1000 Subject: [PATCH 011/215] parsec_cuda_kernel_schedule() schedules a migrated task to a particular device queue. A device queue is created for each gpu device to hold the migrated tasks (migrated_task_list). The parsec_cuda_kernel_schedule() function schedules the migrated task to the correct queue. --- parsec/mca/device/cuda/device_cuda_migrate.c | 38 +++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index cfcdf11da..976a362ae 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -3,6 +3,7 @@ extern int parsec_device_cuda_enabled; static parsec_device_cuda_info_t* device_info; +static parsec_list_t** migrated_task_list; /** @@ -17,17 +18,19 @@ int parsec_cuda_migrate_init(int ndevices) { int i; cudaError_t cudastatus; - nvmlReturn_t nvml_ret; + //nvmlReturn_t nvml_ret; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); + migrated_task_list = (parsec_list_t**) calloc(ndevices, sizeof(parsec_list_t*)); for(i = 0; i < ndevices; i++) { device_info[i].task_count = 0; device_info[i].load = 0; + migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } - nvml_ret = nvmlInit_v2(); + //nvml_ret = nvmlInit_v2(); return 0; @@ -35,8 +38,16 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini(int ndevices) { + int i; + free(device_info); - nvmlShutdown(); + //nvmlShutdown(); + + for(i = 0; i < ndevices; i++) + { + PARSEC_OBJ_RELEASE(migrated_task_list[i]); + } + free(migrated_task_list); return 0; @@ -59,11 +70,11 @@ int parsec_cuda_get_device_load(int device) nvmlDevice_t nvml_device; nvmlUtilization_t nvml_utilization; - nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); - nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); - device_info[device].load = nvml_utilization.gpu; - - printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); + //nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); + //nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); + //device_info[device].load = nvml_utilization.gpu; +// + //printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); return device_info[device].load; @@ -184,6 +195,15 @@ parsec_cuda_change_device( int dealer_device_index) } + +int parsec_cuda_kernel_schedule( parsec_execution_stream_t *es, + parsec_task_t *task, + int starving_device_index) +{ + parsec_list_t* li = migrated_task_list[starving_device_index]; + parsec_list_chain_sorted(li, task, parsec_execution_context_priority_comparator); +} + /** * This function migrate a specific task from a device a * to another. @@ -212,7 +232,7 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, * qpd scheduler. The distance is calucaled as distance = ( (starving device index) * -1 ) -1 * */ - __parsec_schedule(es, (parsec_task_t *) migrated_gpu_task, (starving_device_index * -1) - 1); + parsec_cuda_kernel_schedule(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); printf("Task migrated to device %d \n", starving_device_index); return starving_device_index; From 862bda2d40eceb2ab4e3fd48129b2b72af80275c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 19 Apr 2022 03:48:42 +1000 Subject: [PATCH 012/215] parsec_cuda_kernel_schedule() renamed as parsec_cuda_kernel_enqueue(). parsec_cuda_kernel_dequeue() schedules a migrated task to the gpu device. This function will be called in __parsec_context_wait() just before parsec_current_scheduler->module.select(). This will ensure that the migrated tasks will get priority over new tasks. When a compute thread calls this function, it is forced to try to be a manager of the device. If the device already has a manager, the compute thread passes the control of the task to the manager. If not the compute thread will become the manager. --- parsec/mca/device/cuda/device_cuda_migrate.c | 59 +++++++++++++++----- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 976a362ae..55e34cb5b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,6 +4,7 @@ extern int parsec_device_cuda_enabled; static parsec_device_cuda_info_t* device_info; static parsec_list_t** migrated_task_list; +static int NDEVICES; /** @@ -20,10 +21,11 @@ int parsec_cuda_migrate_init(int ndevices) cudaError_t cudastatus; //nvmlReturn_t nvml_ret; + NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); migrated_task_list = (parsec_list_t**) calloc(ndevices, sizeof(parsec_list_t*)); - for(i = 0; i < ndevices; i++) + for(i = 0; i < NDEVICES; i++) { device_info[i].task_count = 0; device_info[i].load = 0; @@ -36,14 +38,14 @@ int parsec_cuda_migrate_init(int ndevices) } -int parsec_cuda_migrate_fini(int ndevices) +int parsec_cuda_migrate_fini() { int i; free(device_info); //nvmlShutdown(); - for(i = 0; i < ndevices; i++) + for(i = 0; i < NDEVICES; i++) { PARSEC_OBJ_RELEASE(migrated_task_list[i]); } @@ -145,13 +147,13 @@ int is_starving(int device) * * TODO: needs updation */ -int find_starving_device(int dealer_device, int ndevice) +int find_starving_device(int dealer_device) { int i; - printf(" find_starving_device: Total_Dev %d Dealer_Dev %d\n", ndevice, dealer_device); + printf(" find_starving_device: Total_Dev %d Dealer_Dev %d\n", NDEVICES, dealer_device); // 0 device is the CPU, 1 is recursive - for(i = 2; i < (2 + ndevice); i++) + for(i = 2; i < (2 + NDEVICES); i++) { printf("Trying_Dev %d Dealer_Dev %d\n", i, dealer_device); if( i == dealer_device) @@ -196,12 +198,45 @@ parsec_cuda_change_device( int dealer_device_index) -int parsec_cuda_kernel_schedule( parsec_execution_stream_t *es, +int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, parsec_task_t *task, int starving_device_index) { parsec_list_t* li = migrated_task_list[starving_device_index]; - parsec_list_chain_sorted(li, task, parsec_execution_context_priority_comparator); + parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); + + return 0; +} + +/** + * @brief This function will be called in __parsec_context_wait() just before + * parsec_current_scheduler->module.select(). This will ensure that the migrated tasks + * will get priority over new tasks. + * + * When a compute thread calls this function, it is forced to try to be a manager of the + * a device. If the device already has a manager, the compute thread passes the control of + * the task to the manager. If not the compute thread will become the manager. + * + * @param es + * @return int + */ + +int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) +{ + int i; + parsec_task_t * task = NULL; + parsec_list_t* li = NULL; + + for(i = 0; i < NDEVICES; i++) + { + li = migrated_task_list[i]; + task = (parsec_task_t*) parsec_list_pop_front(li); + if(task != NULL) + break; + } + + if(task != NULL) + parsec_cuda_kernel_scheduler(es, task, i); } /** @@ -226,13 +261,7 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, if(starving_device_index == -1) return -1; - /** - * @brief The distance value in normal parsec scheduler is laways positive. So a negative - * distance value can be used to communicate the device index of the staving node to the - * qpd scheduler. The distance is calucaled as distance = ( (starving device index) * -1 ) -1 - * - */ - parsec_cuda_kernel_schedule(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); + parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); printf("Task migrated to device %d \n", starving_device_index); return starving_device_index; From dc70f8d2d04109bda16ebca1afa5de24c9e0e92b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 22 Apr 2022 14:09:56 -0400 Subject: [PATCH 013/215] Migrate protocol: The function parsec_cuda_kernel_dequeue() will force a thread to be a manager thread, if there are any tasks migrated to a particular device. This will also ensure that a migrated task gets priority in execution when compared to a new task. Using migrate_if_starving(), the manager checks if there are starving devices. This checking is done before a new task is selected for execution. If there are any starving devices, the manager migrate tasks to the starving device, if there are available tasks to migrate. --- parsec/build.ninja | 12320 ++++++++++++++++ .../contrib/build_with_parsec/CMakeLists.txt | 41 + parsec/contrib/build_with_parsec/Makefile | 32 + parsec/executed_tasks | 409 + parsec/mca/device/cuda/device_cuda_internal.h | 4 + parsec/mca/device/cuda/device_cuda_migrate.c | 60 +- parsec/mca/device/cuda/device_cuda_migrate.h | 6 +- parsec/mca/device/cuda/device_cuda_module.c | 13 + parsec/parsec-submodule-config.cmake | 103 + parsec/parsec/class/lifo-external.h | 198 + parsec/parsec/fortran/f2c_mangle.h | 16 + parsec/parsec/include/parsec.pc | 13 + parsec/parsec/include/parsec/parsec_config.h | 71 + parsec/parsec/include/parsec/parsec_options.h | 140 + parsec/parsec/mca/mca_static_components.h | 95 + parsec/scheduling.c | 14 +- parsec/tests/apps/stencil/loop_gen_1D | 16 + 17 files changed, 13535 insertions(+), 16 deletions(-) create mode 100644 parsec/build.ninja create mode 100644 parsec/contrib/build_with_parsec/CMakeLists.txt create mode 100644 parsec/contrib/build_with_parsec/Makefile create mode 100644 parsec/executed_tasks create mode 100644 parsec/parsec-submodule-config.cmake create mode 100644 parsec/parsec/class/lifo-external.h create mode 100644 parsec/parsec/fortran/f2c_mangle.h create mode 100644 parsec/parsec/include/parsec.pc create mode 100644 parsec/parsec/include/parsec/parsec_config.h create mode 100644 parsec/parsec/include/parsec/parsec_options.h create mode 100644 parsec/parsec/mca/mca_static_components.h create mode 100755 parsec/tests/apps/stencil/loop_gen_1D diff --git a/parsec/build.ninja b/parsec/build.ninja new file mode 100644 index 000000000..7231e88ea --- /dev/null +++ b/parsec/build.ninja @@ -0,0 +1,12320 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Ninja" Generator, CMake Version 3.20 + +# This file contains all the build statements describing the +# compilation DAG. + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# +# Which is the root file. +# ============================================================================= + +# ============================================================================= +# Project: PARSEC +# Configurations: RelWithDebInfo +# ============================================================================= + +############################################# +# Minimal version of Ninja required by this file + +ninja_required_version = 1.5 + + +############################################# +# Set configuration variable for custom commands. + +CONFIGURATION = RelWithDebInfo +# ============================================================================= +# Include auxiliary files. + + +############################################# +# Include rules file. + +include CMakeFiles/rules.ninja + + +############################################# +# Utility command for install/strip + +build CMakeFiles/install/strip.util: CUSTOM_COMMAND all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build install/strip: phony CMakeFiles/install/strip.util + + +############################################# +# Utility command for install + +build CMakeFiles/install.util: CUSTOM_COMMAND all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build install: phony CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build rebuild_cache: phony CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build edit_cache: phony CMakeFiles/edit_cache.util + + +############################################# +# Utility command for parsec_pregen_flex_bison + +build parsec_pregen_flex_bison: phony CMakeFiles/parsec_pregen_flex_bison parsec/interfaces/ptg/ptg-compiler/parsec_pregen_ptg parsec/parsec_pregen_flex_utils + + +############################################# +# Utility command for ContinuousSubmit + +build ContinuousSubmit: phony CMakeFiles/ContinuousSubmit + + +############################################# +# Utility command for ContinuousCoverage + +build ContinuousCoverage: phony CMakeFiles/ContinuousCoverage + + +############################################# +# Utility command for ContinuousTest + +build ContinuousTest: phony CMakeFiles/ContinuousTest + + +############################################# +# Utility command for ContinuousBuild + +build ContinuousBuild: phony CMakeFiles/ContinuousBuild + + +############################################# +# Utility command for ContinuousMemCheck + +build ContinuousMemCheck: phony CMakeFiles/ContinuousMemCheck + + +############################################# +# Utility command for Nightly + +build Nightly: phony CMakeFiles/Nightly + + +############################################# +# Utility command for package + +build CMakeFiles/package.util: CUSTOM_COMMAND all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build package: phony CMakeFiles/package.util + + +############################################# +# Utility command for NightlyTest + +build NightlyTest: phony CMakeFiles/NightlyTest + + +############################################# +# Utility command for NightlyUpdate + +build NightlyUpdate: phony CMakeFiles/NightlyUpdate + + +############################################# +# Utility command for NightlyBuild + +build NightlyBuild: phony CMakeFiles/NightlyBuild + + +############################################# +# Utility command for install/local + +build CMakeFiles/install/local.util: CUSTOM_COMMAND all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build install/local: phony CMakeFiles/install/local.util + + +############################################# +# Utility command for Continuous + +build Continuous: phony CMakeFiles/Continuous + + +############################################# +# Utility command for NightlyStart + +build NightlyStart: phony CMakeFiles/NightlyStart + + +############################################# +# Utility command for NightlyMemoryCheck + +build NightlyMemoryCheck: phony CMakeFiles/NightlyMemoryCheck + + +############################################# +# Utility command for NightlyMemCheck + +build NightlyMemCheck: phony CMakeFiles/NightlyMemCheck + + +############################################# +# Utility command for package_source + +build CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build package_source: phony CMakeFiles/package_source.util + + +############################################# +# Utility command for ExperimentalStart + +build ExperimentalStart: phony CMakeFiles/ExperimentalStart + + +############################################# +# Utility command for ContinuousConfigure + +build ContinuousConfigure: phony CMakeFiles/ContinuousConfigure + + +############################################# +# Utility command for NightlyCoverage + +build NightlyCoverage: phony CMakeFiles/NightlyCoverage + + +############################################# +# Utility command for ExperimentalUpdate + +build ExperimentalUpdate: phony CMakeFiles/ExperimentalUpdate + + +############################################# +# Utility command for test + +build CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build test: phony CMakeFiles/test.util + + +############################################# +# Utility command for build_with_parsec + +build build_with_parsec: phony CMakeFiles/build_with_parsec contrib/build_with_parsec/dtd_test_allreduce.c contrib/build_with_parsec/write_check.cu contrib/build_with_parsec/write_check.jdf + + +############################################# +# Utility command for ExperimentalConfigure + +build ExperimentalConfigure: phony CMakeFiles/ExperimentalConfigure + + +############################################# +# Utility command for ExperimentalCoverage + +build ExperimentalCoverage: phony CMakeFiles/ExperimentalCoverage + + +############################################# +# Utility command for ExperimentalBuild + +build ExperimentalBuild: phony CMakeFiles/ExperimentalBuild + + +############################################# +# Utility command for NightlyConfigure + +build NightlyConfigure: phony CMakeFiles/NightlyConfigure + + +############################################# +# Utility command for ExperimentalTest + +build ExperimentalTest: phony CMakeFiles/ExperimentalTest + + +############################################# +# Utility command for ExperimentalMemCheck + +build ExperimentalMemCheck: phony CMakeFiles/ExperimentalMemCheck + + +############################################# +# Utility command for Experimental + +build Experimental: phony CMakeFiles/Experimental + + +############################################# +# Utility command for NightlySubmit + +build NightlySubmit: phony CMakeFiles/NightlySubmit + + +############################################# +# Utility command for ExperimentalSubmit + +build ExperimentalSubmit: phony CMakeFiles/ExperimentalSubmit + + +############################################# +# Utility command for ContinuousStart + +build ContinuousStart: phony CMakeFiles/ContinuousStart + + +############################################# +# Utility command for ContinuousUpdate + +build ContinuousUpdate: phony CMakeFiles/ContinuousUpdate + + +############################################# +# Custom command for CMakeFiles/parsec_pregen_flex_bison + +build CMakeFiles/parsec_pregen_flex_bison: CUSTOM_COMMAND || parsec/interfaces/ptg/ptg-compiler/parsec_pregen_ptg parsec/parsec_pregen_flex_utils + COMMAND = cd /home/joseph/parsec/parsec/contrib/pregen_flex_bison && cmake -Dsrcdir=/home/joseph/parsec -Dbuilddir=/home/joseph/parsec/parsec -Darchive=/home/joseph/parsec/contrib/pregen_flex_bison.tar -P /home/joseph/parsec/cmake_modules/pregen_flex_bison.cmake + + +############################################# +# Custom command for CMakeFiles/ContinuousSubmit + +build CMakeFiles/ContinuousSubmit: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousSubmit + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousCoverage + +build CMakeFiles/ContinuousCoverage: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousCoverage + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousTest + +build CMakeFiles/ContinuousTest: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousTest + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousBuild + +build CMakeFiles/ContinuousBuild: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousBuild + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousMemCheck + +build CMakeFiles/ContinuousMemCheck: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousMemCheck + pool = console + + +############################################# +# Custom command for CMakeFiles/Nightly + +build CMakeFiles/Nightly: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D Nightly + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyTest + +build CMakeFiles/NightlyTest: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyTest + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyUpdate + +build CMakeFiles/NightlyUpdate: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyUpdate + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyBuild + +build CMakeFiles/NightlyBuild: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyBuild + pool = console + + +############################################# +# Custom command for CMakeFiles/Continuous + +build CMakeFiles/Continuous: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D Continuous + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyStart + +build CMakeFiles/NightlyStart: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyStart + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyMemoryCheck + +build CMakeFiles/NightlyMemoryCheck: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyMemoryCheck + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyMemCheck + +build CMakeFiles/NightlyMemCheck: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyMemCheck + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalStart + +build CMakeFiles/ExperimentalStart: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalStart + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousConfigure + +build CMakeFiles/ContinuousConfigure: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousConfigure + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyCoverage + +build CMakeFiles/NightlyCoverage: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyCoverage + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalUpdate + +build CMakeFiles/ExperimentalUpdate: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalUpdate + pool = console + + +############################################# +# Phony custom command for CMakeFiles/build_with_parsec + +build CMakeFiles/build_with_parsec: phony contrib/build_with_parsec/dtd_test_allreduce.c contrib/build_with_parsec/write_check.cu ../contrib/build_with_parsec/write_check.jdf + + +############################################# +# Custom command for contrib/build_with_parsec/dtd_test_allreduce.c + +build contrib/build_with_parsec/dtd_test_allreduce.c: CUSTOM_COMMAND ../contrib/build_with_parsec/dtd_test_allreduce.c + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E copy /home/joseph/parsec/contrib/build_with_parsec/dtd_test_allreduce.c /home/joseph/parsec/parsec/contrib/build_with_parsec/dtd_test_allreduce.c + DESC = Generating contrib/build_with_parsec/dtd_test_allreduce.c + restat = 1 + + +############################################# +# Custom command for contrib/build_with_parsec/write_check.cu + +build contrib/build_with_parsec/write_check.cu: CUSTOM_COMMAND ../contrib/build_with_parsec/write_check.cu + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E copy /home/joseph/parsec/contrib/build_with_parsec/write_check.cu /home/joseph/parsec/parsec/contrib/build_with_parsec/write_check.cu + DESC = Generating contrib/build_with_parsec/write_check.cu + restat = 1 + + +############################################# +# Custom command for contrib/build_with_parsec/write_check.jdf + +build contrib/build_with_parsec/write_check.jdf: CUSTOM_COMMAND ../contrib/build_with_parsec/write_check.jdf + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E copy /home/joseph/parsec/contrib/build_with_parsec/write_check.jdf /home/joseph/parsec/parsec/contrib/build_with_parsec/write_check.jdf + DESC = Generating contrib/build_with_parsec/write_check.jdf + restat = 1 + + +############################################# +# Custom command for CMakeFiles/ExperimentalConfigure + +build CMakeFiles/ExperimentalConfigure: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalConfigure + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalCoverage + +build CMakeFiles/ExperimentalCoverage: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalCoverage + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalBuild + +build CMakeFiles/ExperimentalBuild: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalBuild + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlyConfigure + +build CMakeFiles/NightlyConfigure: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlyConfigure + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalTest + +build CMakeFiles/ExperimentalTest: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalTest + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalMemCheck + +build CMakeFiles/ExperimentalMemCheck: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalMemCheck + pool = console + + +############################################# +# Custom command for CMakeFiles/Experimental + +build CMakeFiles/Experimental: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D Experimental + pool = console + + +############################################# +# Custom command for CMakeFiles/NightlySubmit + +build CMakeFiles/NightlySubmit: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D NightlySubmit + pool = console + + +############################################# +# Custom command for CMakeFiles/ExperimentalSubmit + +build CMakeFiles/ExperimentalSubmit: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ExperimentalSubmit + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousStart + +build CMakeFiles/ContinuousStart: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousStart + pool = console + + +############################################# +# Custom command for CMakeFiles/ContinuousUpdate + +build CMakeFiles/ContinuousUpdate: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest -D ContinuousUpdate + pool = console + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tools/CMakeFiles/install/strip.util: CUSTOM_COMMAND tools/all + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tools/install/strip: phony tools/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tools/CMakeFiles/install/local.util: CUSTOM_COMMAND tools/all + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tools/install/local: phony tools/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tools/CMakeFiles/install.util: CUSTOM_COMMAND tools/all + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tools/install: phony tools/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tools/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tools/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tools/rebuild_cache: phony tools/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tools/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tools/edit_cache: phony tools/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tools/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tools/package_source: phony tools/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tools/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tools/test: phony tools/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build tools/CMakeFiles/package.util: CUSTOM_COMMAND tools/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tools/package: phony tools/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tools/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tools/profiling/CMakeFiles/install/strip.util: CUSTOM_COMMAND tools/profiling/all + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tools/profiling/install/strip: phony tools/profiling/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tools/profiling/CMakeFiles/install/local.util: CUSTOM_COMMAND tools/profiling/all + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tools/profiling/install/local: phony tools/profiling/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tools/profiling/CMakeFiles/install.util: CUSTOM_COMMAND tools/profiling/all + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tools/profiling/install: phony tools/profiling/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tools/profiling/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tools/profiling/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tools/profiling/rebuild_cache: phony tools/profiling/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tools/profiling/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tools/profiling/edit_cache: phony tools/profiling/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tools/profiling/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tools/profiling/package_source: phony tools/profiling/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tools/profiling/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/profiling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tools/profiling/test: phony tools/profiling/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build tools/profiling/CMakeFiles/package.util: CUSTOM_COMMAND tools/profiling/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tools/profiling/package: phony tools/profiling/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tools/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tools/aggregator_visu/CMakeFiles/install/strip.util: CUSTOM_COMMAND tools/aggregator_visu/all + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tools/aggregator_visu/install/strip: phony tools/aggregator_visu/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tools/aggregator_visu/CMakeFiles/install/local.util: CUSTOM_COMMAND tools/aggregator_visu/all + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tools/aggregator_visu/install/local: phony tools/aggregator_visu/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tools/aggregator_visu/CMakeFiles/install.util: CUSTOM_COMMAND tools/aggregator_visu/all + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tools/aggregator_visu/install: phony tools/aggregator_visu/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tools/aggregator_visu/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tools/aggregator_visu/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tools/aggregator_visu/rebuild_cache: phony tools/aggregator_visu/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tools/aggregator_visu/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tools/aggregator_visu/edit_cache: phony tools/aggregator_visu/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tools/aggregator_visu/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tools/aggregator_visu/package_source: phony tools/aggregator_visu/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tools/aggregator_visu/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tools/aggregator_visu && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tools/aggregator_visu/test: phony tools/aggregator_visu/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build tools/aggregator_visu/CMakeFiles/package.util: CUSTOM_COMMAND tools/aggregator_visu/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tools/aggregator_visu/package: phony tools/aggregator_visu/CMakeFiles/package.util + +# ============================================================================= +# Object build statements for EXECUTABLE target parsec-reader + + +############################################# +# Order-only phony target for parsec-reader + +build cmake_object_order_depends_target_parsec-reader: phony || tools/aggregator_visu/CMakeFiles/parsec-reader.dir + +build tools/aggregator_visu/CMakeFiles/parsec-reader.dir/reader.c.o: C_COMPILER__parsec-reader_RelWithDebInfo ../tools/aggregator_visu/reader.c || cmake_object_order_depends_target_parsec-reader + DEFINES = -D_GNU_SOURCE + DEP_FILE = tools/aggregator_visu/CMakeFiles/parsec-reader.dir/reader.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I/usr/include/libxml2 + OBJECT_DIR = tools/aggregator_visu/CMakeFiles/parsec-reader.dir + OBJECT_FILE_DIR = tools/aggregator_visu/CMakeFiles/parsec-reader.dir + TARGET_COMPILE_PDB = tools/aggregator_visu/CMakeFiles/parsec-reader.dir/ + TARGET_PDB = tools/aggregator_visu/parsec-reader.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target parsec-reader + + +############################################# +# Link the executable tools/aggregator_visu/parsec-reader + +build tools/aggregator_visu/parsec-reader: C_EXECUTABLE_LINKER__parsec-reader_RelWithDebInfo tools/aggregator_visu/CMakeFiles/parsec-reader.dir/reader.c.o | /usr/lib/x86_64-linux-gnu/libxml2.so /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so + FLAGS = -O2 -g -DNDEBUG + LINK_LIBRARIES = -Wl,-rpath,/usr/local/lib /usr/lib/x86_64-linux-gnu/libxml2.so -lrt -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -lm -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -pthread + OBJECT_DIR = tools/aggregator_visu/CMakeFiles/parsec-reader.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tools/aggregator_visu/CMakeFiles/parsec-reader.dir/ + TARGET_FILE = tools/aggregator_visu/parsec-reader + TARGET_PDB = tools/aggregator_visu/parsec-reader.pdb + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build parsec/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/all + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/install/local: phony parsec/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build parsec/CMakeFiles/install.util: CUSTOM_COMMAND parsec/all + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/install: phony parsec/CMakeFiles/install.util + + +############################################# +# Utility command for parsec_pregen_flex_utils + +build parsec/parsec_pregen_flex_utils: phony contrib/pregen_flex_bison/parsec/utils/keyval_lex.l.c contrib/pregen_flex_bison/parsec/utils/show_help_lex.l.c + +# ============================================================================= +# Object build statements for SHARED_LIBRARY target parsec + + +############################################# +# Order-only phony target for parsec + +build cmake_object_order_depends_target_parsec: phony || parsec/data_dist/matrix/ptgpp_parsec.apply parsec/data_dist/matrix/ptgpp_parsec.diag_band_to_rect parsec/data_dist/matrix/ptgpp_parsec.reduce parsec/data_dist/matrix/ptgpp_parsec.reduce_col parsec/data_dist/matrix/ptgpp_parsec.reduce_row parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute_reshuffle parsec/utils/keyval_lex.l.c parsec/utils/show_help_lex.l.c + +build parsec/CMakeFiles/parsec.dir/class/parsec_dequeue.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_dequeue.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_dequeue.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_fifo.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_fifo.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_fifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_lifo.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_lifo.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_lifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_list.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_list.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_list.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_object.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_object.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_object.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_value_array.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_value_array.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_value_array.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_hash_table.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_hash_table.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_hash_table.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_rwlock.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_rwlock.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_rwlock.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_future.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_future.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_future.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/parsec_datacopy_future.c.o: C_COMPILER__parsec_RelWithDebInfo class/parsec_datacopy_future.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/parsec_datacopy_future.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/class/info.c.o: C_COMPILER__parsec_RelWithDebInfo class/info.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/class/info.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/argv.c.o: C_COMPILER__parsec_RelWithDebInfo utils/argv.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/argv.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/cmd_line.c.o: C_COMPILER__parsec_RelWithDebInfo utils/cmd_line.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/cmd_line.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/colors.c.o: C_COMPILER__parsec_RelWithDebInfo utils/colors.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/colors.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/parsec_environ.c.o: C_COMPILER__parsec_RelWithDebInfo utils/parsec_environ.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/parsec_environ.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/installdirs.c.o: C_COMPILER__parsec_RelWithDebInfo utils/installdirs.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/installdirs.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/keyval_parse.c.o: C_COMPILER__parsec_RelWithDebInfo utils/keyval_parse.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/keyval_parse.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/mca_param.c.o: C_COMPILER__parsec_RelWithDebInfo utils/mca_param.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/mca_param.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/mca_param_cmd_line.c.o: C_COMPILER__parsec_RelWithDebInfo utils/mca_param_cmd_line.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/mca_param_cmd_line.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/mca_parse_paramfile.c.o: C_COMPILER__parsec_RelWithDebInfo utils/mca_parse_paramfile.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/mca_parse_paramfile.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/os_path.c.o: C_COMPILER__parsec_RelWithDebInfo utils/os_path.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/os_path.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/output.c.o: C_COMPILER__parsec_RelWithDebInfo utils/output.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/output.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/show_help.c.o: C_COMPILER__parsec_RelWithDebInfo utils/show_help.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/show_help.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/zone_malloc.c.o: C_COMPILER__parsec_RelWithDebInfo utils/zone_malloc.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/zone_malloc.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/atomic_external.c.o: C_COMPILER__parsec_RelWithDebInfo utils/atomic_external.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/atomic_external.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/debug.c.o: C_COMPILER__parsec_RelWithDebInfo utils/debug.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/debug.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/win_compat.c.o: C_COMPILER__parsec_RelWithDebInfo utils/win_compat.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/win_compat.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/keyval_lex.l.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/utils/keyval_lex.l.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/keyval_lex.l.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/utils/show_help_lex.l.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/utils/show_help_lex.l.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/utils/show_help_lex.l.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/arena.c.o: C_COMPILER__parsec_RelWithDebInfo arena.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/arena.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/barrier.c.o: C_COMPILER__parsec_RelWithDebInfo barrier.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/barrier.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/bindthread.c.o: C_COMPILER__parsec_RelWithDebInfo bindthread.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/bindthread.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/parsec.c.o: C_COMPILER__parsec_RelWithDebInfo parsec.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/parsec.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/parsec_reshape.c.o: C_COMPILER__parsec_RelWithDebInfo parsec_reshape.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/parsec_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data.c.o: C_COMPILER__parsec_RelWithDebInfo data.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_distribution.c.o: C_COMPILER__parsec_RelWithDebInfo data_distribution.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_distribution.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/debug_marks.c.o: C_COMPILER__parsec_RelWithDebInfo debug_marks.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/debug_marks.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/mca_repository.c.o: C_COMPILER__parsec_RelWithDebInfo mca/mca_repository.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/mca_repository.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mempool.c.o: C_COMPILER__parsec_RelWithDebInfo mempool.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mempool.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/private_mempool.c.o: C_COMPILER__parsec_RelWithDebInfo private_mempool.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/private_mempool.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/remote_dep.c.o: C_COMPILER__parsec_RelWithDebInfo remote_dep.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/remote_dep.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/scheduling.c.o: C_COMPILER__parsec_RelWithDebInfo scheduling.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/scheduling.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/compound.c.o: C_COMPILER__parsec_RelWithDebInfo compound.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/compound.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/vpmap.c.o: C_COMPILER__parsec_RelWithDebInfo vpmap.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/vpmap.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/maxheap.c.o: C_COMPILER__parsec_RelWithDebInfo maxheap.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/maxheap.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/hbbuffer.c.o: C_COMPILER__parsec_RelWithDebInfo hbbuffer.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/hbbuffer.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/datarepo.c.o: C_COMPILER__parsec_RelWithDebInfo datarepo.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/datarepo.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/datatype/datatype_mpi.c.o: C_COMPILER__parsec_RelWithDebInfo datatype/datatype_mpi.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/datatype/datatype_mpi.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/datatype + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/parsec_hwloc.c.o: C_COMPILER__parsec_RelWithDebInfo parsec_hwloc.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/parsec_hwloc.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/profiling.c.o: C_COMPILER__parsec_RelWithDebInfo profiling.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/profiling.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/interfaces/interface.c.o: C_COMPILER__parsec_RelWithDebInfo interfaces/interface.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/interfaces/interface.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/interfaces + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/interfaces/dtd/parsec_dtd_data_flush.c.o: C_COMPILER__parsec_RelWithDebInfo interfaces/dtd/parsec_dtd_data_flush.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/interfaces/dtd/parsec_dtd_data_flush.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/interfaces/dtd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/interfaces/dtd/overlap_strategies.c.o: C_COMPILER__parsec_RelWithDebInfo interfaces/dtd/overlap_strategies.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/interfaces/dtd/overlap_strategies.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/interfaces/dtd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/interfaces/dtd/insert_function.c.o: C_COMPILER__parsec_RelWithDebInfo interfaces/dtd/insert_function.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/interfaces/dtd/insert_function.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/interfaces/dtd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/device/device.c.o: C_COMPILER__parsec_RelWithDebInfo mca/device/device.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/device/device.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/device + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/pins.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/pins.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/pins.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/pins_init.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/pins_init.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/pins_init.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/iterators_checker/pins_iterators_checker_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/iterators_checker/pins_iterators_checker_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/print_steals/pins_print_steals_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/print_steals + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/print_steals/pins_print_steals_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/print_steals + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/ptg_to_dtd/pins_ptg_to_dtd_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/ptg_to_dtd/pins_ptg_to_dtd_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/task_granularity/pins_task_granularity_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/pins/task_granularity/pins_task_granularity_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ap/sched_ap_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ap + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ap/sched_ap_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ap + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/gd/sched_gd_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/gd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/gd/sched_gd_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/gd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ip/sched_ip_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ip + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ip/sched_ip_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ip + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/lfq/sched_lfq_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/lfq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/lfq/sched_lfq_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/lfq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/lhq/sched_lhq_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/lhq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/lhq/sched_lhq_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/lhq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ll/sched_ll_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ll + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ll/sched_ll_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ll + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ltq/sched_ltq_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ltq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/ltq/sched_ltq_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/ltq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/pbq/sched_pbq_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/pbq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/pbq/sched_pbq_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/pbq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/rnd/sched_rnd_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/rnd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/rnd/sched_rnd_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/rnd + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_component.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/spq/sched_spq_component.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_component.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/spq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_module.c.o: C_COMPILER__parsec_RelWithDebInfo mca/sched/spq/sched_spq_module.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_module.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/mca/sched/spq + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_col.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/reduce_col.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_col.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_row.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/reduce_row.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_row.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/reduce.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/diag_band_to_rect.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/diag_band_to_rect.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/diag_band_to_rect.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/apply.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrix.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/matrix.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrix.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrixtypes.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/matrixtypes.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrixtypes.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/map_operator.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/map_operator.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/map_operator.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_tabular.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/two_dim_tabular.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_tabular.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/grid_2Dcyclic.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/grid_2Dcyclic.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/grid_2Dcyclic.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/two_dim_rectangle_cyclic.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic_band.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/two_dim_rectangle_cyclic_band.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic_band.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/sym_two_dim_rectangle_cyclic.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic_band.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/sym_two_dim_rectangle_cyclic_band.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic_band.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/vector_two_dim_cyclic.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/vector_two_dim_cyclic.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/vector_two_dim_cyclic.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/subtile.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/subtile.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/subtile.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_wrapper.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/reduce_wrapper.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply_wrapper.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/apply_wrapper.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_dtd.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/redistribute/redistribute_dtd.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_dtd.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_wrapper.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/matrix/redistribute/redistribute_wrapper.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/redistribute/redistribute.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_reshuffle.c.o: C_COMPILER__parsec_RelWithDebInfo parsec/data_dist/matrix/redistribute/redistribute_reshuffle.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_reshuffle.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/data_dist/hash_datadist.c.o: C_COMPILER__parsec_RelWithDebInfo data_dist/hash_datadist.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/data_dist/hash_datadist.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/data_dist + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90-pp.f90 | parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o.ddi: Fortran_PREPROCESS_SCAN__parsec_RelWithDebInfo fortran/parsecf.F90 || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90-pp.f90.d + DYNDEP_INTERMEDIATE_FILE = parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o.ddi + FLAGS = -O2 -g -DNDEBUG -Jparsec/include/fortran -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute -I/usr/include -I/usr/local/include + OBJ_FILE = parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o + +build parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o: Fortran_COMPILER__parsec_RelWithDebInfo parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90-pp.f90 || parsec/CMakeFiles/parsec.dir/Fortran.dd + FLAGS = -O2 -g -DNDEBUG -Jparsec/include/fortran -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -fpreprocessed + INCLUDES = -Ifortran -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute -I/usr/include -I/usr/local/include + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/fortran + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + dyndep = parsec/CMakeFiles/parsec.dir/Fortran.dd + +build parsec/CMakeFiles/parsec.dir/fortran/parsecf.c.o: C_COMPILER__parsec_RelWithDebInfo fortran/parsecf.c || cmake_object_order_depends_target_parsec + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE -Dparsec_EXPORTS + DEP_FILE = parsec/CMakeFiles/parsec.dir/fortran/parsecf.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/data_dist/matrix -Iparsec/data_dist/matrix/redistribute + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec.dir/fortran + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_PDB = parsec/libparsec.pdb + +build parsec/CMakeFiles/parsec.dir/Fortran.dd: Fortran_DYNDEP__parsec_RelWithDebInfo parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o.ddi || parsec/data_dist/matrix/ptgpp_parsec.apply parsec/data_dist/matrix/ptgpp_parsec.diag_band_to_rect parsec/data_dist/matrix/ptgpp_parsec.reduce parsec/data_dist/matrix/ptgpp_parsec.reduce_col parsec/data_dist/matrix/ptgpp_parsec.reduce_row parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute_reshuffle + + +# ============================================================================= +# Link build statements for SHARED_LIBRARY target parsec + + +############################################# +# Link the shared library parsec/libparsec.so.4.0.0 + +build parsec/libparsec.so.4.0.0: Fortran_SHARED_LIBRARY_LINKER__parsec_RelWithDebInfo parsec/CMakeFiles/parsec.dir/class/parsec_dequeue.c.o parsec/CMakeFiles/parsec.dir/class/parsec_fifo.c.o parsec/CMakeFiles/parsec.dir/class/parsec_lifo.c.o parsec/CMakeFiles/parsec.dir/class/parsec_list.c.o parsec/CMakeFiles/parsec.dir/class/parsec_object.c.o parsec/CMakeFiles/parsec.dir/class/parsec_value_array.c.o parsec/CMakeFiles/parsec.dir/class/parsec_hash_table.c.o parsec/CMakeFiles/parsec.dir/class/parsec_rwlock.c.o parsec/CMakeFiles/parsec.dir/class/parsec_future.c.o parsec/CMakeFiles/parsec.dir/class/parsec_datacopy_future.c.o parsec/CMakeFiles/parsec.dir/class/info.c.o parsec/CMakeFiles/parsec.dir/utils/argv.c.o parsec/CMakeFiles/parsec.dir/utils/cmd_line.c.o parsec/CMakeFiles/parsec.dir/utils/colors.c.o parsec/CMakeFiles/parsec.dir/utils/parsec_environ.c.o parsec/CMakeFiles/parsec.dir/utils/installdirs.c.o parsec/CMakeFiles/parsec.dir/utils/keyval_parse.c.o parsec/CMakeFiles/parsec.dir/utils/mca_param.c.o parsec/CMakeFiles/parsec.dir/utils/mca_param_cmd_line.c.o parsec/CMakeFiles/parsec.dir/utils/mca_parse_paramfile.c.o parsec/CMakeFiles/parsec.dir/utils/os_path.c.o parsec/CMakeFiles/parsec.dir/utils/output.c.o parsec/CMakeFiles/parsec.dir/utils/show_help.c.o parsec/CMakeFiles/parsec.dir/utils/zone_malloc.c.o parsec/CMakeFiles/parsec.dir/utils/atomic_external.c.o parsec/CMakeFiles/parsec.dir/utils/debug.c.o parsec/CMakeFiles/parsec.dir/utils/win_compat.c.o parsec/CMakeFiles/parsec.dir/utils/keyval_lex.l.c.o parsec/CMakeFiles/parsec.dir/utils/show_help_lex.l.c.o parsec/CMakeFiles/parsec.dir/arena.c.o parsec/CMakeFiles/parsec.dir/barrier.c.o parsec/CMakeFiles/parsec.dir/bindthread.c.o parsec/CMakeFiles/parsec.dir/parsec.c.o parsec/CMakeFiles/parsec.dir/parsec_reshape.c.o parsec/CMakeFiles/parsec.dir/data.c.o parsec/CMakeFiles/parsec.dir/data_distribution.c.o parsec/CMakeFiles/parsec.dir/debug_marks.c.o parsec/CMakeFiles/parsec.dir/mca/mca_repository.c.o parsec/CMakeFiles/parsec.dir/mempool.c.o parsec/CMakeFiles/parsec.dir/private_mempool.c.o parsec/CMakeFiles/parsec.dir/remote_dep.c.o parsec/CMakeFiles/parsec.dir/scheduling.c.o parsec/CMakeFiles/parsec.dir/compound.c.o parsec/CMakeFiles/parsec.dir/vpmap.c.o parsec/CMakeFiles/parsec.dir/maxheap.c.o parsec/CMakeFiles/parsec.dir/hbbuffer.c.o parsec/CMakeFiles/parsec.dir/datarepo.c.o parsec/CMakeFiles/parsec.dir/datatype/datatype_mpi.c.o parsec/CMakeFiles/parsec.dir/parsec_hwloc.c.o parsec/CMakeFiles/parsec.dir/profiling.c.o parsec/CMakeFiles/parsec.dir/interfaces/interface.c.o parsec/CMakeFiles/parsec.dir/interfaces/dtd/parsec_dtd_data_flush.c.o parsec/CMakeFiles/parsec.dir/interfaces/dtd/overlap_strategies.c.o parsec/CMakeFiles/parsec.dir/interfaces/dtd/insert_function.c.o parsec/CMakeFiles/parsec.dir/mca/device/device.c.o parsec/CMakeFiles/parsec.dir/mca/pins/pins.c.o parsec/CMakeFiles/parsec.dir/mca/pins/pins_init.c.o parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_component.c.o parsec/CMakeFiles/parsec.dir/mca/pins/iterators_checker/pins_iterators_checker_module.c.o parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_component.c.o parsec/CMakeFiles/parsec.dir/mca/pins/print_steals/pins_print_steals_module.c.o parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_component.c.o parsec/CMakeFiles/parsec.dir/mca/pins/ptg_to_dtd/pins_ptg_to_dtd_module.c.o parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_component.c.o parsec/CMakeFiles/parsec.dir/mca/pins/task_granularity/pins_task_granularity_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ap/sched_ap_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/gd/sched_gd_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ip/sched_ip_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/lfq/sched_lfq_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/lhq/sched_lhq_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ll/sched_ll_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/ltq/sched_ltq_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/pbq/sched_pbq_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/rnd/sched_rnd_module.c.o parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_component.c.o parsec/CMakeFiles/parsec.dir/mca/sched/spq/sched_spq_module.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_col.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_row.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/diag_band_to_rect.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrix.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/matrixtypes.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/map_operator.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_tabular.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/grid_2Dcyclic.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/two_dim_rectangle_cyclic_band.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/sym_two_dim_rectangle_cyclic_band.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/vector_two_dim_cyclic.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/subtile.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/reduce_wrapper.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/apply_wrapper.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_dtd.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_wrapper.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute.c.o parsec/CMakeFiles/parsec.dir/data_dist/matrix/redistribute/redistribute_reshuffle.c.o parsec/CMakeFiles/parsec.dir/data_dist/hash_datadist.c.o parsec/CMakeFiles/parsec.dir/fortran/parsecf.F90.o parsec/CMakeFiles/parsec.dir/fortran/parsecf.c.o | /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so /usr/lib/x86_64-linux-gnu/libhwloc.so /usr/local/lib/libmpi.so || parsec/data_dist/matrix/ptgpp_parsec.apply parsec/data_dist/matrix/ptgpp_parsec.diag_band_to_rect parsec/data_dist/matrix/ptgpp_parsec.reduce parsec/data_dist/matrix/ptgpp_parsec.reduce_col parsec/data_dist/matrix/ptgpp_parsec.reduce_row parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute_reshuffle + LANGUAGE_COMPILE_FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/usr/local/lib:::::::::::::::::::::::::::::::::::::: -lrt -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -pthread -lm -lquadmath -lc -latomic /usr/lib/x86_64-linux-gnu/libhwloc.so /usr/local/lib/libmpi.so + OBJECT_DIR = parsec/CMakeFiles/parsec.dir + POST_BUILD = : + PRE_LINK = : + SONAME = libparsec.so.4 + SONAME_FLAG = -Wl,-soname, + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec.dir/ + TARGET_FILE = parsec/libparsec.so.4.0.0 + TARGET_PDB = parsec/libparsec.pdb + + +############################################# +# Create library symlink parsec/libparsec.so + +build parsec/libparsec.so.4 parsec/libparsec.so: CMAKE_SYMLINK_LIBRARY parsec/libparsec.so.4.0.0 + POST_BUILD = : + +# ============================================================================= +# Object build statements for STATIC_LIBRARY target parsec-base + + +############################################# +# Order-only phony target for parsec-base + +build cmake_object_order_depends_target_parsec-base: phony || cmake_object_order_depends_target_parsec-base-obj + + +# ============================================================================= +# Link build statements for STATIC_LIBRARY target parsec-base + + +############################################# +# Link the static library parsec/libparsec-base.a + +build parsec/libparsec-base.a: C_STATIC_LIBRARY_LINKER__parsec-base_RelWithDebInfo parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_dequeue.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_fifo.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_lifo.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_list.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_object.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_value_array.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_hash_table.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_rwlock.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_future.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_datacopy_future.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/info.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/argv.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/cmd_line.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/colors.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/parsec_environ.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/installdirs.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_parse.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param_cmd_line.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_parse_paramfile.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/os_path.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/output.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/zone_malloc.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/atomic_external.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/debug.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/win_compat.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_lex.l.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help_lex.l.c.o || parsec/parsec-base-obj + LANGUAGE_COMPILE_FLAGS = -O2 -g -DNDEBUG + OBJECT_DIR = parsec/CMakeFiles/parsec-base.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base.dir/parsec-base.pdb + TARGET_FILE = parsec/libparsec-base.a + TARGET_PDB = parsec/libparsec-base.pdb + + +############################################# +# Utility command for package + +build parsec/CMakeFiles/package.util: CUSTOM_COMMAND parsec/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/package: phony parsec/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build parsec/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/test: phony parsec/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build parsec/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/package_source: phony parsec/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build parsec/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/edit_cache: phony parsec/CMakeFiles/edit_cache.util + +# ============================================================================= +# Object build statements for OBJECT_LIBRARY target parsec-base-obj + + +############################################# +# Order-only phony target for parsec-base-obj + +build cmake_object_order_depends_target_parsec-base-obj: phony || parsec/utils/keyval_lex.l.c parsec/utils/show_help_lex.l.c + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_dequeue.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_dequeue.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_dequeue.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_fifo.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_fifo.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_fifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_lifo.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_lifo.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_lifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_list.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_list.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_list.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_object.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_object.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_object.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_value_array.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_value_array.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_value_array.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_hash_table.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_hash_table.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_hash_table.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_rwlock.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_rwlock.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_rwlock.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_future.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_future.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_future.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_datacopy_future.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/parsec_datacopy_future.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_datacopy_future.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/class/info.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo class/info.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/class/info.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/class + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/argv.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/argv.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/argv.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/cmd_line.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/cmd_line.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/cmd_line.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/colors.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/colors.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/colors.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/parsec_environ.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/parsec_environ.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/parsec_environ.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/installdirs.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/installdirs.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/installdirs.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_parse.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/keyval_parse.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_parse.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/mca_param.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param_cmd_line.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/mca_param_cmd_line.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param_cmd_line.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_parse_paramfile.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/mca_parse_paramfile.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_parse_paramfile.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/os_path.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/os_path.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/os_path.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/output.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/output.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/output.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/show_help.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/zone_malloc.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/zone_malloc.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/zone_malloc.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/atomic_external.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/atomic_external.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/atomic_external.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/debug.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/debug.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/debug.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/win_compat.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo utils/win_compat.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/win_compat.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_lex.l.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo parsec/utils/keyval_lex.l.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_lex.l.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + +build parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help_lex.l.c.o: C_COMPILER__parsec-base-obj_RelWithDebInfo parsec/utils/show_help_lex.l.c || cmake_object_order_depends_target_parsec-base-obj + DEFINES = -DBUILDING_PARSEC -DYYERROR_VERBOSE -D_GNU_SOURCE + DEP_FILE = parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help_lex.l.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIC -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ + OBJECT_DIR = parsec/CMakeFiles/parsec-base-obj.dir + OBJECT_FILE_DIR = parsec/CMakeFiles/parsec-base-obj.dir/utils + TARGET_COMPILE_PDB = parsec/CMakeFiles/parsec-base-obj.dir/ + TARGET_PDB = "" + + + +############################################# +# Object library parsec-base-obj + +build parsec/parsec-base-obj: phony parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_dequeue.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_fifo.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_lifo.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_list.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_object.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_value_array.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_hash_table.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_rwlock.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_future.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/parsec_datacopy_future.c.o parsec/CMakeFiles/parsec-base-obj.dir/class/info.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/argv.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/cmd_line.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/colors.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/parsec_environ.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/installdirs.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_parse.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_param_cmd_line.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/mca_parse_paramfile.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/os_path.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/output.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/zone_malloc.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/atomic_external.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/debug.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/win_compat.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/keyval_lex.l.c.o parsec/CMakeFiles/parsec-base-obj.dir/utils/show_help_lex.l.c.o + + +############################################# +# Utility command for rebuild_cache + +build parsec/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/rebuild_cache: phony parsec/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build parsec/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/all + COMMAND = cd /home/joseph/parsec/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/install/strip: phony parsec/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build parsec/list_install_components: phony + + +############################################# +# Custom command for contrib/pregen_flex_bison/parsec/utils/keyval_lex.l.c + +build contrib/pregen_flex_bison/parsec/utils/keyval_lex.l.c: CUSTOM_COMMAND utils/keyval_lex.l + COMMAND = cd /home/joseph/parsec/parsec/parsec && /usr/bin/flex --noline -o/home/joseph/parsec/parsec/contrib/pregen_flex_bison/parsec/utils/keyval_lex.l.c /home/joseph/parsec/parsec/utils/keyval_lex.l + DESC = [FLEX][pregen_keyval_flex] Building scanner with flex 2.6.4 + restat = 1 + + +############################################# +# Custom command for contrib/pregen_flex_bison/parsec/utils/show_help_lex.l.c + +build contrib/pregen_flex_bison/parsec/utils/show_help_lex.l.c: CUSTOM_COMMAND utils/show_help_lex.l + COMMAND = cd /home/joseph/parsec/parsec/parsec && /usr/bin/flex --noline -o/home/joseph/parsec/parsec/contrib/pregen_flex_bison/parsec/utils/show_help_lex.l.c /home/joseph/parsec/parsec/utils/show_help_lex.l + DESC = [FLEX][pregen_show_help_flex] Building scanner with flex 2.6.4 + restat = 1 + + +############################################# +# Custom command for parsec/utils/keyval_lex.l.c + +build parsec/utils/keyval_lex.l.c: CUSTOM_COMMAND utils/keyval_lex.l + COMMAND = cd /home/joseph/parsec/parsec/parsec && /usr/bin/flex -o/home/joseph/parsec/parsec/parsec/utils/keyval_lex.l.c /home/joseph/parsec/parsec/utils/keyval_lex.l + DESC = [FLEX][keyval_flex] Building scanner with flex 2.6.4 + restat = 1 + + +############################################# +# Custom command for parsec/utils/show_help_lex.l.c + +build parsec/utils/show_help_lex.l.c: CUSTOM_COMMAND utils/show_help_lex.l + COMMAND = cd /home/joseph/parsec/parsec/parsec && /usr/bin/flex -o/home/joseph/parsec/parsec/parsec/utils/show_help_lex.l.c /home/joseph/parsec/parsec/utils/show_help_lex.l + DESC = [FLEX][show_help_flex] Building scanner with flex 2.6.4 + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/interfaces/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build parsec/interfaces/ptg/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/interfaces/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/interfaces/ptg/install/strip: phony parsec/interfaces/ptg/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build parsec/interfaces/ptg/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/interfaces/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/interfaces/ptg/install/local: phony parsec/interfaces/ptg/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build parsec/interfaces/ptg/CMakeFiles/install.util: CUSTOM_COMMAND parsec/interfaces/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/interfaces/ptg/install: phony parsec/interfaces/ptg/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build parsec/interfaces/ptg/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build parsec/interfaces/ptg/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/interfaces/ptg/rebuild_cache: phony parsec/interfaces/ptg/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build parsec/interfaces/ptg/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/interfaces/ptg/edit_cache: phony parsec/interfaces/ptg/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build parsec/interfaces/ptg/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/interfaces/ptg/package_source: phony parsec/interfaces/ptg/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build parsec/interfaces/ptg/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/interfaces/ptg/test: phony parsec/interfaces/ptg/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build parsec/interfaces/ptg/CMakeFiles/package.util: CUSTOM_COMMAND parsec/interfaces/ptg/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/interfaces/ptg/package: phony parsec/interfaces/ptg/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/interfaces/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for parsec_pregen_ptg + +build parsec/interfaces/ptg/ptg-compiler/parsec_pregen_ptg: phony contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.c contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.h contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.l.c + + +############################################# +# Utility command for package + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/package.util: CUSTOM_COMMAND parsec/interfaces/ptg/ptg-compiler/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/package: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/test: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/edit_cache: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/package_source: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target parsec-ptgpp + + +############################################# +# Order-only phony target for parsec-ptgpp + +build cmake_object_order_depends_target_parsec-ptgpp: phony || cmake_object_order_depends_target_parsec-base cmake_object_order_depends_target_parsec-base-obj parsec/interfaces/ptg/ptg-compiler/parsec.l.c parsec/interfaces/ptg/ptg-compiler/parsec.y.c parsec/interfaces/ptg/ptg-compiler/parsec.y.h + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf.c.o: C_COMPILER__parsec-ptgpp_RelWithDebInfo interfaces/ptg/ptg-compiler/jdf.c || cmake_object_order_depends_target_parsec-ptgpp + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iinterfaces/ptg/ptg-compiler -Iparsec/include -I. -Iinclude -I../ -Iparsec/interfaces/ptg/ptg-compiler + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + OBJECT_FILE_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf2c.c.o: C_COMPILER__parsec-ptgpp_RelWithDebInfo interfaces/ptg/ptg-compiler/jdf2c.c || cmake_object_order_depends_target_parsec-ptgpp + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf2c.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iinterfaces/ptg/ptg-compiler -Iparsec/include -I. -Iinclude -I../ -Iparsec/interfaces/ptg/ptg-compiler + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + OBJECT_FILE_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf_unparse.c.o: C_COMPILER__parsec-ptgpp_RelWithDebInfo interfaces/ptg/ptg-compiler/jdf_unparse.c || cmake_object_order_depends_target_parsec-ptgpp + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf_unparse.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iinterfaces/ptg/ptg-compiler -Iparsec/include -I. -Iinclude -I../ -Iparsec/interfaces/ptg/ptg-compiler + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + OBJECT_FILE_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.y.c.o: C_COMPILER__parsec-ptgpp_RelWithDebInfo parsec/interfaces/ptg/ptg-compiler/parsec.y.c || cmake_object_order_depends_target_parsec-ptgpp + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.y.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iinterfaces/ptg/ptg-compiler -Iparsec/include -I. -Iinclude -I../ -Iparsec/interfaces/ptg/ptg-compiler + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + OBJECT_FILE_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.l.c.o: C_COMPILER__parsec-ptgpp_RelWithDebInfo parsec/interfaces/ptg/ptg-compiler/parsec.l.c | parsec/interfaces/ptg/ptg-compiler/parsec.y.h || cmake_object_order_depends_target_parsec-ptgpp + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.l.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iinterfaces/ptg/ptg-compiler -Iparsec/include -I. -Iinclude -I../ -Iparsec/interfaces/ptg/ptg-compiler + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + OBJECT_FILE_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target parsec-ptgpp + + +############################################# +# Link the executable parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp: C_EXECUTABLE_LINKER__parsec-ptgpp_RelWithDebInfo parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf.c.o parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf2c.c.o parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/jdf_unparse.c.o parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.y.c.o parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/parsec.l.c.o | parsec/libparsec-base.a || parsec/libparsec-base.a parsec/parsec-base-obj + FLAGS = -O2 -g -DNDEBUG + LINK_LIBRARIES = -Wl,-rpath,::::::::::::::::::::::::::::::::::::: -lm parsec/libparsec-base.a -latomic -pthread + OBJECT_DIR = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = parsec/interfaces/ptg/ptg-compiler/CMakeFiles/parsec-ptgpp.dir/ + TARGET_FILE = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + TARGET_PDB = parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp.pdb + + +############################################# +# Utility command for rebuild_cache + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/rebuild_cache: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/interfaces/ptg/ptg-compiler/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/install/strip: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build parsec/interfaces/ptg/ptg-compiler/list_install_components: phony + + +############################################# +# Utility command for install + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install.util: CUSTOM_COMMAND parsec/interfaces/ptg/ptg-compiler/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/install: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/interfaces/ptg/ptg-compiler/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/interfaces/ptg/ptg-compiler/install/local: phony parsec/interfaces/ptg/ptg-compiler/CMakeFiles/install/local.util + + +############################################# +# Custom command for contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.c + +build contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.c contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.h: CUSTOM_COMMAND interfaces/ptg/ptg-compiler/parsec.y + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /usr/bin/bison -l -d -o /home/joseph/parsec/parsec/contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.y.c /home/joseph/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.y + DESC = [BISON][pregen_parsec_yacc] Building parser with bison 3.5.1 + restat = 1 + + +############################################# +# Custom command for contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.l.c + +build contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.l.c: CUSTOM_COMMAND interfaces/ptg/ptg-compiler/parsec.l + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /usr/bin/flex --noline -o/home/joseph/parsec/parsec/contrib/pregen_flex_bison/parsec/interfaces/ptg/ptg-compiler/parsec.l.c /home/joseph/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.l + DESC = [FLEX][pregen_parsec_flex] Building scanner with flex 2.6.4 + restat = 1 + + +############################################# +# Custom command for parsec/interfaces/ptg/ptg-compiler/parsec.y.c + +build parsec/interfaces/ptg/ptg-compiler/parsec.y.c parsec/interfaces/ptg/ptg-compiler/parsec.y.h: CUSTOM_COMMAND interfaces/ptg/ptg-compiler/parsec.y || parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /usr/bin/bison -d -o /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.y.c /home/joseph/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.y + DESC = [BISON][parsec_yacc] Building parser with bison 3.5.1 + restat = 1 + + +############################################# +# Custom command for parsec/interfaces/ptg/ptg-compiler/parsec.l.c + +build parsec/interfaces/ptg/ptg-compiler/parsec.l.c: CUSTOM_COMMAND interfaces/ptg/ptg-compiler/parsec.l || parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler && /usr/bin/flex -o/home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.l.c /home/joseph/parsec/parsec/interfaces/ptg/ptg-compiler/parsec.l + DESC = [FLEX][parsec_flex] Building scanner with flex 2.6.4 + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build parsec/data_dist/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/data_dist/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/data_dist/install/strip: phony parsec/data_dist/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build parsec/data_dist/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/data_dist/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/data_dist/install/local: phony parsec/data_dist/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build parsec/data_dist/CMakeFiles/install.util: CUSTOM_COMMAND parsec/data_dist/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/data_dist/install: phony parsec/data_dist/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build parsec/data_dist/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build parsec/data_dist/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/data_dist/rebuild_cache: phony parsec/data_dist/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build parsec/data_dist/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/data_dist/edit_cache: phony parsec/data_dist/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build parsec/data_dist/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/data_dist/package_source: phony parsec/data_dist/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build parsec/data_dist/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/data_dist/test: phony parsec/data_dist/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build parsec/data_dist/CMakeFiles/package.util: CUSTOM_COMMAND parsec/data_dist/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/data_dist/package: phony parsec/data_dist/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/data_dist/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install + +build parsec/data_dist/matrix/CMakeFiles/install.util: CUSTOM_COMMAND parsec/data_dist/matrix/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/data_dist/matrix/install: phony parsec/data_dist/matrix/CMakeFiles/install.util + + +############################################# +# Utility command for install/strip + +build parsec/data_dist/matrix/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/data_dist/matrix/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/data_dist/matrix/install/strip: phony parsec/data_dist/matrix/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build parsec/data_dist/matrix/list_install_components: phony + + +############################################# +# Utility command for ptgpp_parsec.reduce_col + +build parsec/data_dist/matrix/ptgpp_parsec.reduce_col: phony parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_col parsec/data_dist/matrix/reduce_col.h parsec/data_dist/matrix/reduce_col.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for edit_cache + +build parsec/data_dist/matrix/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/data_dist/matrix/edit_cache: phony parsec/data_dist/matrix/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for ptgpp_parsec.reduce_row + +build parsec/data_dist/matrix/ptgpp_parsec.reduce_row: phony parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_row parsec/data_dist/matrix/reduce_row.h parsec/data_dist/matrix/reduce_row.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_parsec.reduce + +build parsec/data_dist/matrix/ptgpp_parsec.reduce: phony parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce parsec/data_dist/matrix/reduce.h parsec/data_dist/matrix/reduce.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for rebuild_cache + +build parsec/data_dist/matrix/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/data_dist/matrix/rebuild_cache: phony parsec/data_dist/matrix/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/local + +build parsec/data_dist/matrix/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/data_dist/matrix/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/data_dist/matrix/install/local: phony parsec/data_dist/matrix/CMakeFiles/install/local.util + + +############################################# +# Utility command for ptgpp_parsec.diag_band_to_rect + +build parsec/data_dist/matrix/ptgpp_parsec.diag_band_to_rect: phony parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.diag_band_to_rect parsec/data_dist/matrix/diag_band_to_rect.h parsec/data_dist/matrix/diag_band_to_rect.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_parsec.apply + +build parsec/data_dist/matrix/ptgpp_parsec.apply: phony parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.apply parsec/data_dist/matrix/apply.h parsec/data_dist/matrix/apply.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build parsec/data_dist/matrix/CMakeFiles/package.util: CUSTOM_COMMAND parsec/data_dist/matrix/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/data_dist/matrix/package: phony parsec/data_dist/matrix/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build parsec/data_dist/matrix/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/data_dist/matrix/test: phony parsec/data_dist/matrix/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build parsec/data_dist/matrix/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/data_dist/matrix/package_source: phony parsec/data_dist/matrix/CMakeFiles/package_source.util + + +############################################# +# Phony custom command for parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_col + +build parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_col: phony parsec/data_dist/matrix/reduce_col.h parsec/data_dist/matrix/reduce_col.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/reduce_col.h + +build parsec/data_dist/matrix/reduce_col.h parsec/data_dist/matrix/reduce_col.c: CUSTOM_COMMAND data_dist/matrix/reduce_col.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/reduce_col.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/parsec/data_dist/matrix/reduce_col.jdf -C reduce_col.c -H reduce_col.h -f reduce_col + DESC = Generating reduce_col.h, reduce_col.c + restat = 1 + + +############################################# +# Phony custom command for parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_row + +build parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce_row: phony parsec/data_dist/matrix/reduce_row.h parsec/data_dist/matrix/reduce_row.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/reduce_row.h + +build parsec/data_dist/matrix/reduce_row.h parsec/data_dist/matrix/reduce_row.c: CUSTOM_COMMAND data_dist/matrix/reduce_row.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/reduce_row.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/parsec/data_dist/matrix/reduce_row.jdf -C reduce_row.c -H reduce_row.h -f reduce_row + DESC = Generating reduce_row.h, reduce_row.c + restat = 1 + + +############################################# +# Phony custom command for parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce + +build parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.reduce: phony parsec/data_dist/matrix/reduce.h parsec/data_dist/matrix/reduce.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/reduce.h + +build parsec/data_dist/matrix/reduce.h parsec/data_dist/matrix/reduce.c: CUSTOM_COMMAND data_dist/matrix/reduce.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/reduce.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/parsec/data_dist/matrix/reduce.jdf -C reduce.c -H reduce.h -f reduce + DESC = Generating reduce.h, reduce.c + restat = 1 + + +############################################# +# Phony custom command for parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.diag_band_to_rect + +build parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.diag_band_to_rect: phony parsec/data_dist/matrix/diag_band_to_rect.h parsec/data_dist/matrix/diag_band_to_rect.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/diag_band_to_rect.h + +build parsec/data_dist/matrix/diag_band_to_rect.h parsec/data_dist/matrix/diag_band_to_rect.c: CUSTOM_COMMAND data_dist/matrix/diag_band_to_rect.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/diag_band_to_rect.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/parsec/data_dist/matrix/diag_band_to_rect.jdf -C diag_band_to_rect.c -H diag_band_to_rect.h -f diag_band_to_rect + DESC = Generating diag_band_to_rect.h, diag_band_to_rect.c + restat = 1 + + +############################################# +# Phony custom command for parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.apply + +build parsec/data_dist/matrix/CMakeFiles/ptgpp_parsec.apply: phony parsec/data_dist/matrix/apply.h parsec/data_dist/matrix/apply.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/apply.h + +build parsec/data_dist/matrix/apply.h parsec/data_dist/matrix/apply.c: CUSTOM_COMMAND data_dist/matrix/apply.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/apply.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/parsec/data_dist/matrix/apply.jdf -C apply.c -H apply.h -f apply + DESC = Generating apply.h, apply.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/data_dist/matrix/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for rebuild_cache + +build parsec/data_dist/matrix/redistribute/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/rebuild_cache: phony parsec/data_dist/matrix/redistribute/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for ptgpp_parsec.redistribute_reshuffle + +build parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute_reshuffle: phony parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute_reshuffle parsec/data_dist/matrix/redistribute/redistribute_reshuffle.h parsec/data_dist/matrix/redistribute/redistribute_reshuffle.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build parsec/data_dist/matrix/redistribute/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/package_source: phony parsec/data_dist/matrix/redistribute/CMakeFiles/package_source.util + + +############################################# +# Utility command for package + +build parsec/data_dist/matrix/redistribute/CMakeFiles/package.util: CUSTOM_COMMAND parsec/data_dist/matrix/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/package: phony parsec/data_dist/matrix/redistribute/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build parsec/data_dist/matrix/redistribute/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/test: phony parsec/data_dist/matrix/redistribute/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build parsec/data_dist/matrix/redistribute/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/data_dist/matrix/redistribute/edit_cache: phony parsec/data_dist/matrix/redistribute/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build parsec/data_dist/matrix/redistribute/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/data_dist/matrix/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/install/strip: phony parsec/data_dist/matrix/redistribute/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build parsec/data_dist/matrix/redistribute/list_install_components: phony + + +############################################# +# Utility command for ptgpp_parsec.redistribute + +build parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute: phony parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute parsec/data_dist/matrix/redistribute/redistribute.h parsec/data_dist/matrix/redistribute/redistribute.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install + +build parsec/data_dist/matrix/redistribute/CMakeFiles/install.util: CUSTOM_COMMAND parsec/data_dist/matrix/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/install: phony parsec/data_dist/matrix/redistribute/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build parsec/data_dist/matrix/redistribute/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/data_dist/matrix/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/data_dist/matrix/redistribute/install/local: phony parsec/data_dist/matrix/redistribute/CMakeFiles/install/local.util + + +############################################# +# Phony custom command for parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute_reshuffle + +build parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute_reshuffle: phony parsec/data_dist/matrix/redistribute/redistribute_reshuffle.h parsec/data_dist/matrix/redistribute/redistribute_reshuffle.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/redistribute/redistribute_reshuffle.h + +build parsec/data_dist/matrix/redistribute/redistribute_reshuffle.h parsec/data_dist/matrix/redistribute/redistribute_reshuffle.c: CUSTOM_COMMAND data_dist/matrix/redistribute/redistribute_reshuffle.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/redistribute/redistribute_reshuffle.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/parsec/data_dist/matrix/redistribute/redistribute_reshuffle.jdf -C redistribute_reshuffle.c -H redistribute_reshuffle.h -f redistribute_reshuffle + DESC = Generating redistribute_reshuffle.h, redistribute_reshuffle.c + restat = 1 + + +############################################# +# Phony custom command for parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute + +build parsec/data_dist/matrix/redistribute/CMakeFiles/ptgpp_parsec.redistribute: phony parsec/data_dist/matrix/redistribute/redistribute.h parsec/data_dist/matrix/redistribute/redistribute.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for parsec/data_dist/matrix/redistribute/redistribute.h + +build parsec/data_dist/matrix/redistribute/redistribute.h parsec/data_dist/matrix/redistribute/redistribute.c: CUSTOM_COMMAND data_dist/matrix/redistribute/redistribute.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp data_dist/matrix/redistribute/redistribute.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/parsec/data_dist/matrix/redistribute/redistribute.jdf -C redistribute.c -H redistribute.h -f redistribute + DESC = Generating redistribute.h, redistribute.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build parsec/fortran/CMakeFiles/install/strip.util: CUSTOM_COMMAND parsec/fortran/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build parsec/fortran/install/strip: phony parsec/fortran/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build parsec/fortran/CMakeFiles/install/local.util: CUSTOM_COMMAND parsec/fortran/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build parsec/fortran/install/local: phony parsec/fortran/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build parsec/fortran/CMakeFiles/install.util: CUSTOM_COMMAND parsec/fortran/all + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build parsec/fortran/install: phony parsec/fortran/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build parsec/fortran/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build parsec/fortran/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build parsec/fortran/rebuild_cache: phony parsec/fortran/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build parsec/fortran/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build parsec/fortran/edit_cache: phony parsec/fortran/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build parsec/fortran/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build parsec/fortran/package_source: phony parsec/fortran/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build parsec/fortran/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/parsec/fortran && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build parsec/fortran/test: phony parsec/fortran/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build parsec/fortran/CMakeFiles/package.util: CUSTOM_COMMAND parsec/fortran/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build parsec/fortran/package: phony parsec/fortran/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for parsec_build_tests + +build tests/parsec_build_tests: phony tests/api/compose tests/api/operator tests/api/touch_ex tests/api/touch_ex_inline tests/api/touch_exf tests/apps/all2all/a2a tests/apps/generalized_reduction/BT_reduction tests/apps/haar_tree/project tests/apps/merge_sort/merge_sort tests/apps/pingpong/bw_test tests/apps/pingpong/rtt tests/apps/stencil/testing_stencil_1D tests/class/atomics tests/class/atomics_inline tests/class/future tests/class/future_datacopy tests/class/hash tests/class/hash_inline tests/class/lifo tests/class/lifo_inline tests/class/list tests/class/list_inline tests/class/rwlock tests/class/rwlock_inline tests/collections/kcyclic tests/collections/redistribute/testing_redistribute tests/collections/redistribute/testing_redistribute_random tests/collections/reduce tests/collections/reshape/avoidable_reshape tests/collections/reshape/input_dep_reshape_single_copy tests/collections/reshape/remote_multiple_outs_same_pred_flow tests/collections/reshape/reshape tests/collections/two_dim_band/testing_band tests/dsl/dtd/dtd_test_allreduce tests/dsl/dtd/dtd_test_broadcast tests/dsl/dtd/dtd_test_data_flush tests/dsl/dtd/dtd_test_explicit_task_creation tests/dsl/dtd/dtd_test_flag_dont_track tests/dsl/dtd/dtd_test_global_id_for_dc_assumed tests/dsl/dtd/dtd_test_hierarchy tests/dsl/dtd/dtd_test_insert_task_interface tests/dsl/dtd/dtd_test_interleave_actions tests/dsl/dtd/dtd_test_multiple_handle_wait tests/dsl/dtd/dtd_test_null_as_tile tests/dsl/dtd/dtd_test_pingpong tests/dsl/dtd/dtd_test_reduce tests/dsl/dtd/dtd_test_task_generation tests/dsl/dtd/dtd_test_task_inserting_task tests/dsl/dtd/dtd_test_task_insertion tests/dsl/dtd/dtd_test_task_placement tests/dsl/dtd/dtd_test_template_counter tests/dsl/dtd/dtd_test_tp_enqueue_dequeue tests/dsl/dtd/dtd_test_untie tests/dsl/dtd/dtd_test_war tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/branching_ht tests/dsl/ptg/branching/branching_idxarr tests/dsl/ptg/choice/choice tests/dsl/ptg/complex_deps tests/dsl/ptg/controlgather/ctlgat tests/dsl/ptg/local-indices/local_indices tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL tests/dsl/ptg/ptgpp/write_check tests/dsl/ptg/startup tests/dsl/ptg/strange tests/dsl/ptg/user-defined-functions/udf tests/runtime/dtt_bug_replicator tests/runtime/multichain tests/runtime/scheduling/schedmicro + + +############################################# +# Utility command for package + +build tests/CMakeFiles/package.util: CUSTOM_COMMAND tests/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/package: phony tests/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/test: phony tests/CMakeFiles/test.util + +# ============================================================================= +# Object build statements for OBJECT_LIBRARY target tests_common + + +############################################# +# Order-only phony target for tests_common + +build cmake_object_order_depends_target_tests_common: phony || cmake_object_order_depends_target_parsec + +build tests/CMakeFiles/tests_common.dir/tests_data.c.o: C_COMPILER__tests_common_RelWithDebInfo ../tests/tests_data.c || cmake_object_order_depends_target_tests_common + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/CMakeFiles/tests_common.dir/tests_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/CMakeFiles/tests_common.dir + OBJECT_FILE_DIR = tests/CMakeFiles/tests_common.dir + TARGET_COMPILE_PDB = tests/CMakeFiles/tests_common.dir/ + TARGET_PDB = "" + + + +############################################# +# Object library tests_common + +build tests/tests_common: phony tests/CMakeFiles/tests_common.dir/tests_data.c.o + + +############################################# +# Utility command for package_source + +build tests/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/package_source: phony tests/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/edit_cache: phony tests/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/rebuild_cache: phony tests/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/all + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/install/strip: phony tests/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/CMakeFiles/install.util: CUSTOM_COMMAND tests/all + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/install: phony tests/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/all + COMMAND = cd /home/joseph/parsec/parsec/tests && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/install/local: phony tests/CMakeFiles/install/local.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/class/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/class/all + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/class/install/local: phony tests/class/CMakeFiles/install/local.util + + +############################################# +# Utility command for package_source + +build tests/class/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/class/package_source: phony tests/class/CMakeFiles/package_source.util + + +############################################# +# Utility command for package + +build tests/class/CMakeFiles/package.util: CUSTOM_COMMAND tests/class/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/class/package: phony tests/class/CMakeFiles/package.util + +# ============================================================================= +# Object build statements for EXECUTABLE target hash_inline + + +############################################# +# Order-only phony target for hash_inline + +build cmake_object_order_depends_target_hash_inline: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/hash_inline.dir/hash.c.o: C_COMPILER__hash_inline_RelWithDebInfo ../tests/class/hash.c || cmake_object_order_depends_target_hash_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/hash_inline.dir/hash.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/hash_inline.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/hash_inline.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/hash_inline.dir/ + TARGET_PDB = tests/class/hash_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target hash_inline + + +############################################# +# Link the executable tests/class/hash_inline + +build tests/class/hash_inline: C_EXECUTABLE_LINKER__hash_inline_RelWithDebInfo tests/class/CMakeFiles/hash_inline.dir/hash.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/hash_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/hash_inline.dir/ + TARGET_FILE = tests/class/hash_inline + TARGET_PDB = tests/class/hash_inline.pdb + + +############################################# +# Utility command for install/strip + +build tests/class/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/class/all + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/class/install/strip: phony tests/class/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/class/list_install_components: phony + +# ============================================================================= +# Object build statements for EXECUTABLE target list_inline + + +############################################# +# Order-only phony target for list_inline + +build cmake_object_order_depends_target_list_inline: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/list_inline.dir/list.c.o: C_COMPILER__list_inline_RelWithDebInfo ../tests/class/list.c || cmake_object_order_depends_target_list_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/list_inline.dir/list.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/list_inline.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/list_inline.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/list_inline.dir/ + TARGET_PDB = tests/class/list_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target list_inline + + +############################################# +# Link the executable tests/class/list_inline + +build tests/class/list_inline: C_EXECUTABLE_LINKER__list_inline_RelWithDebInfo tests/class/CMakeFiles/list_inline.dir/list.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/list_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/list_inline.dir/ + TARGET_FILE = tests/class/list_inline + TARGET_PDB = tests/class/list_inline.pdb + + +############################################# +# Utility command for install + +build tests/class/CMakeFiles/install.util: CUSTOM_COMMAND tests/class/all + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/class/install: phony tests/class/CMakeFiles/install.util + +# ============================================================================= +# Object build statements for EXECUTABLE target atomics + + +############################################# +# Order-only phony target for atomics + +build cmake_object_order_depends_target_atomics: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/atomics.dir/atomics.c.o: C_COMPILER__atomics_RelWithDebInfo ../tests/class/atomics.c || cmake_object_order_depends_target_atomics + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/atomics.dir/atomics.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/atomics.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/atomics.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/atomics.dir/ + TARGET_PDB = tests/class/atomics.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target atomics + + +############################################# +# Link the executable tests/class/atomics + +build tests/class/atomics: C_EXECUTABLE_LINKER__atomics_RelWithDebInfo tests/class/CMakeFiles/atomics.dir/atomics.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/atomics.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/atomics.dir/ + TARGET_FILE = tests/class/atomics + TARGET_PDB = tests/class/atomics.pdb + + +############################################# +# Utility command for test + +build tests/class/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/class/test: phony tests/class/CMakeFiles/test.util + +# ============================================================================= +# Object build statements for EXECUTABLE target future + + +############################################# +# Order-only phony target for future + +build cmake_object_order_depends_target_future: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/future.dir/future.c.o: C_COMPILER__future_RelWithDebInfo ../tests/class/future.c || cmake_object_order_depends_target_future + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/future.dir/future.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/future.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/future.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/future.dir/ + TARGET_PDB = tests/class/future.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target future + + +############################################# +# Link the executable tests/class/future + +build tests/class/future: C_EXECUTABLE_LINKER__future_RelWithDebInfo tests/class/CMakeFiles/future.dir/future.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/future.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/future.dir/ + TARGET_FILE = tests/class/future + TARGET_PDB = tests/class/future.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target lifo_inline + + +############################################# +# Order-only phony target for lifo_inline + +build cmake_object_order_depends_target_lifo_inline: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/lifo_inline.dir/lifo.c.o: C_COMPILER__lifo_inline_RelWithDebInfo ../tests/class/lifo.c || cmake_object_order_depends_target_lifo_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/lifo_inline.dir/lifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/lifo_inline.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/lifo_inline.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/lifo_inline.dir/ + TARGET_PDB = tests/class/lifo_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target lifo_inline + + +############################################# +# Link the executable tests/class/lifo_inline + +build tests/class/lifo_inline: C_EXECUTABLE_LINKER__lifo_inline_RelWithDebInfo tests/class/CMakeFiles/lifo_inline.dir/lifo.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/lifo_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/lifo_inline.dir/ + TARGET_FILE = tests/class/lifo_inline + TARGET_PDB = tests/class/lifo_inline.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target future_datacopy + + +############################################# +# Order-only phony target for future_datacopy + +build cmake_object_order_depends_target_future_datacopy: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/future_datacopy.dir/future_datacopy.c.o: C_COMPILER__future_datacopy_RelWithDebInfo ../tests/class/future_datacopy.c || cmake_object_order_depends_target_future_datacopy + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/future_datacopy.dir/future_datacopy.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/future_datacopy.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/future_datacopy.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/future_datacopy.dir/ + TARGET_PDB = tests/class/future_datacopy.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target future_datacopy + + +############################################# +# Link the executable tests/class/future_datacopy + +build tests/class/future_datacopy: C_EXECUTABLE_LINKER__future_datacopy_RelWithDebInfo tests/class/CMakeFiles/future_datacopy.dir/future_datacopy.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/future_datacopy.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/future_datacopy.dir/ + TARGET_FILE = tests/class/future_datacopy + TARGET_PDB = tests/class/future_datacopy.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target lifo + + +############################################# +# Order-only phony target for lifo + +build cmake_object_order_depends_target_lifo: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/lifo.dir/lifo.c.o: C_COMPILER__lifo_RelWithDebInfo ../tests/class/lifo.c || cmake_object_order_depends_target_lifo + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/lifo.dir/lifo.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/lifo.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/lifo.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/lifo.dir/ + TARGET_PDB = tests/class/lifo.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target lifo + + +############################################# +# Link the executable tests/class/lifo + +build tests/class/lifo: C_EXECUTABLE_LINKER__lifo_RelWithDebInfo tests/class/CMakeFiles/lifo.dir/lifo.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/lifo.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/lifo.dir/ + TARGET_FILE = tests/class/lifo + TARGET_PDB = tests/class/lifo.pdb + + +############################################# +# Utility command for edit_cache + +build tests/class/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/class/edit_cache: phony tests/class/CMakeFiles/edit_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target rwlock_inline + + +############################################# +# Order-only phony target for rwlock_inline + +build cmake_object_order_depends_target_rwlock_inline: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/rwlock_inline.dir/rwlock.c.o: C_COMPILER__rwlock_inline_RelWithDebInfo ../tests/class/rwlock.c || cmake_object_order_depends_target_rwlock_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/rwlock_inline.dir/rwlock.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/rwlock_inline.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/rwlock_inline.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/rwlock_inline.dir/ + TARGET_PDB = tests/class/rwlock_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target rwlock_inline + + +############################################# +# Link the executable tests/class/rwlock_inline + +build tests/class/rwlock_inline: C_EXECUTABLE_LINKER__rwlock_inline_RelWithDebInfo tests/class/CMakeFiles/rwlock_inline.dir/rwlock.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/rwlock_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/rwlock_inline.dir/ + TARGET_FILE = tests/class/rwlock_inline + TARGET_PDB = tests/class/rwlock_inline.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target rwlock + + +############################################# +# Order-only phony target for rwlock + +build cmake_object_order_depends_target_rwlock: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/rwlock.dir/rwlock.c.o: C_COMPILER__rwlock_RelWithDebInfo ../tests/class/rwlock.c || cmake_object_order_depends_target_rwlock + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/rwlock.dir/rwlock.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/rwlock.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/rwlock.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/rwlock.dir/ + TARGET_PDB = tests/class/rwlock.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target rwlock + + +############################################# +# Link the executable tests/class/rwlock + +build tests/class/rwlock: C_EXECUTABLE_LINKER__rwlock_RelWithDebInfo tests/class/CMakeFiles/rwlock.dir/rwlock.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/rwlock.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/rwlock.dir/ + TARGET_FILE = tests/class/rwlock + TARGET_PDB = tests/class/rwlock.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target hash + + +############################################# +# Order-only phony target for hash + +build cmake_object_order_depends_target_hash: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/hash.dir/hash.c.o: C_COMPILER__hash_RelWithDebInfo ../tests/class/hash.c || cmake_object_order_depends_target_hash + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/hash.dir/hash.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/hash.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/hash.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/hash.dir/ + TARGET_PDB = tests/class/hash.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target hash + + +############################################# +# Link the executable tests/class/hash + +build tests/class/hash: C_EXECUTABLE_LINKER__hash_RelWithDebInfo tests/class/CMakeFiles/hash.dir/hash.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/hash.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/hash.dir/ + TARGET_FILE = tests/class/hash + TARGET_PDB = tests/class/hash.pdb + + +############################################# +# Utility command for rebuild_cache + +build tests/class/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/class && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/class/rebuild_cache: phony tests/class/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target list + + +############################################# +# Order-only phony target for list + +build cmake_object_order_depends_target_list: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/list.dir/list.c.o: C_COMPILER__list_RelWithDebInfo ../tests/class/list.c || cmake_object_order_depends_target_list + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/list.dir/list.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/list.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/list.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/list.dir/ + TARGET_PDB = tests/class/list.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target list + + +############################################# +# Link the executable tests/class/list + +build tests/class/list: C_EXECUTABLE_LINKER__list_RelWithDebInfo tests/class/CMakeFiles/list.dir/list.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/list.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/list.dir/ + TARGET_FILE = tests/class/list + TARGET_PDB = tests/class/list.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target atomics_inline + + +############################################# +# Order-only phony target for atomics_inline + +build cmake_object_order_depends_target_atomics_inline: phony || cmake_object_order_depends_target_parsec + +build tests/class/CMakeFiles/atomics_inline.dir/atomics.c.o: C_COMPILER__atomics_inline_RelWithDebInfo ../tests/class/atomics.c || cmake_object_order_depends_target_atomics_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/class/CMakeFiles/atomics_inline.dir/atomics.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/class/CMakeFiles/atomics_inline.dir + OBJECT_FILE_DIR = tests/class/CMakeFiles/atomics_inline.dir + TARGET_COMPILE_PDB = tests/class/CMakeFiles/atomics_inline.dir/ + TARGET_PDB = tests/class/atomics_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target atomics_inline + + +############################################# +# Link the executable tests/class/atomics_inline + +build tests/class/atomics_inline: C_EXECUTABLE_LINKER__atomics_inline_RelWithDebInfo tests/class/CMakeFiles/atomics_inline.dir/atomics.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -pthread -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/class/CMakeFiles/atomics_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/class/CMakeFiles/atomics_inline.dir/ + TARGET_FILE = tests/class/atomics_inline + TARGET_PDB = tests/class/atomics_inline.pdb + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/api/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/api/all + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/api/install/local: phony tests/api/CMakeFiles/install/local.util + + +############################################# +# Utility command for install/strip + +build tests/api/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/api/all + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/api/install/strip: phony tests/api/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/api/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tests/api/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/api/rebuild_cache: phony tests/api/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tests/api/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/api/edit_cache: phony tests/api/CMakeFiles/edit_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target touch_ex + + +############################################# +# Order-only phony target for touch_ex + +build cmake_object_order_depends_target_touch_ex: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/api/ptgpp_touch_ex.touch tests/api/touch.c tests/api/touch.h + +build tests/api/CMakeFiles/touch_ex.dir/touch_ex.c.o: C_COMPILER__touch_ex_RelWithDebInfo ../tests/api/touch_ex.c || cmake_object_order_depends_target_touch_ex + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_ex.dir/touch_ex.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/touch_ex.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_ex.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex.dir/ + TARGET_PDB = tests/api/touch_ex.pdb + +build tests/api/CMakeFiles/touch_ex.dir/touch.c.o: C_COMPILER__touch_ex_RelWithDebInfo tests/api/touch.c || cmake_object_order_depends_target_touch_ex + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_ex.dir/touch.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/touch_ex.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_ex.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex.dir/ + TARGET_PDB = tests/api/touch_ex.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target touch_ex + + +############################################# +# Link the executable tests/api/touch_ex + +build tests/api/touch_ex: C_EXECUTABLE_LINKER__touch_ex_RelWithDebInfo tests/api/CMakeFiles/touch_ex.dir/touch_ex.c.o tests/api/CMakeFiles/touch_ex.dir/touch.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/api/ptgpp_touch_ex.touch parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/api/CMakeFiles/touch_ex.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex.dir/ + TARGET_FILE = tests/api/touch_ex + TARGET_PDB = tests/api/touch_ex.pdb + + +############################################# +# Utility command for package_source + +build tests/api/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/api/package_source: phony tests/api/CMakeFiles/package_source.util + + +############################################# +# Utility command for install + +build tests/api/CMakeFiles/install.util: CUSTOM_COMMAND tests/api/all + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/api/install: phony tests/api/CMakeFiles/install.util + +# ============================================================================= +# Object build statements for EXECUTABLE target touch_ex_inline + + +############################################# +# Order-only phony target for touch_ex_inline + +build cmake_object_order_depends_target_touch_ex_inline: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/api/ptgpp_touch_ex_inline.touch tests/api/touch.c tests/api/touch.h + +build tests/api/CMakeFiles/touch_ex_inline.dir/touch_ex.c.o: C_COMPILER__touch_ex_inline_RelWithDebInfo ../tests/api/touch_ex.c || cmake_object_order_depends_target_touch_ex_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_ex_inline.dir/touch_ex.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/touch_ex_inline.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_ex_inline.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex_inline.dir/ + TARGET_PDB = tests/api/touch_ex_inline.pdb + +build tests/api/CMakeFiles/touch_ex_inline.dir/touch.c.o: C_COMPILER__touch_ex_inline_RelWithDebInfo tests/api/touch.c || cmake_object_order_depends_target_touch_ex_inline + DEFINES = -DBUILDING_PARSEC -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_ex_inline.dir/touch.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/touch_ex_inline.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_ex_inline.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex_inline.dir/ + TARGET_PDB = tests/api/touch_ex_inline.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target touch_ex_inline + + +############################################# +# Link the executable tests/api/touch_ex_inline + +build tests/api/touch_ex_inline: C_EXECUTABLE_LINKER__touch_ex_inline_RelWithDebInfo tests/api/CMakeFiles/touch_ex_inline.dir/touch_ex.c.o tests/api/CMakeFiles/touch_ex_inline.dir/touch.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/api/ptgpp_touch_ex_inline.touch parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/api/CMakeFiles/touch_ex_inline.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_ex_inline.dir/ + TARGET_FILE = tests/api/touch_ex_inline + TARGET_PDB = tests/api/touch_ex_inline.pdb + + +############################################# +# Utility command for ptgpp_touch_ex_inline.touch + +build tests/api/ptgpp_touch_ex_inline.touch: phony tests/api/CMakeFiles/ptgpp_touch_ex_inline.touch tests/api/touch.h tests/api/touch.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target operator + + +############################################# +# Order-only phony target for operator + +build cmake_object_order_depends_target_operator: phony || cmake_object_order_depends_target_parsec + +build tests/api/CMakeFiles/operator.dir/operator.c.o: C_COMPILER__operator_RelWithDebInfo ../tests/api/operator.c || cmake_object_order_depends_target_operator + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/operator.dir/operator.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/operator.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/operator.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/operator.dir/ + TARGET_PDB = tests/api/operator.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target operator + + +############################################# +# Link the executable tests/api/operator + +build tests/api/operator: C_EXECUTABLE_LINKER__operator_RelWithDebInfo tests/api/CMakeFiles/operator.dir/operator.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/api/CMakeFiles/operator.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/api/CMakeFiles/operator.dir/ + TARGET_FILE = tests/api/operator + TARGET_PDB = tests/api/operator.pdb + + +############################################# +# Utility command for package + +build tests/api/CMakeFiles/package.util: CUSTOM_COMMAND tests/api/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/api/package: phony tests/api/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/api/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/api/test: phony tests/api/CMakeFiles/test.util + + +############################################# +# Utility command for ptgpp_touch_ex.touch + +build tests/api/ptgpp_touch_ex.touch: phony tests/api/CMakeFiles/ptgpp_touch_ex.touch tests/api/touch.h tests/api/touch.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target touch_exf + + +############################################# +# Order-only phony target for touch_exf + +build cmake_object_order_depends_target_touch_exf: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/api/ptgpp_touch_exf.touch tests/api/touch.c tests/api/touch.h + +build tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90-pp.f90 | tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o.ddi: Fortran_PREPROCESS_SCAN__touch_exf_RelWithDebInfo ../tests/api/touch_exf.F90 || cmake_object_order_depends_target_touch_exf + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90-pp.f90.d + DYNDEP_INTERMEDIATE_FILE = tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o.ddi + FLAGS = -O2 -g -DNDEBUG -Jparsec/include/fortran -fPIE -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran -I/usr/include -I/usr/local/include -I/usr/local/lib + OBJ_FILE = tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o + +build tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o: Fortran_COMPILER__touch_exf_RelWithDebInfo tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90-pp.f90 || tests/api/CMakeFiles/touch_exf.dir/Fortran.dd + FLAGS = -O2 -g -DNDEBUG -Jparsec/include/fortran -fPIE -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -fpreprocessed + INCLUDES = -I../tests/api -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran -I/usr/include -I/usr/local/include -I/usr/local/lib + OBJECT_DIR = tests/api/CMakeFiles/touch_exf.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_exf.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_exf.dir/ + TARGET_PDB = tests/api/touch_exf.pdb + dyndep = tests/api/CMakeFiles/touch_exf.dir/Fortran.dd + +build tests/api/CMakeFiles/touch_exf.dir/touch.c.o: C_COMPILER__touch_exf_RelWithDebInfo tests/api/touch.c || cmake_object_order_depends_target_touch_exf + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/touch_exf.dir/touch.c.o.d + FLAGS = -O2 -g -DNDEBUG -fPIE -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/api -Iparsec/include/fortran -I/usr/local/lib + OBJECT_DIR = tests/api/CMakeFiles/touch_exf.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/touch_exf.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_exf.dir/ + TARGET_PDB = tests/api/touch_exf.pdb + +build tests/api/CMakeFiles/touch_exf.dir/Fortran.dd: Fortran_DYNDEP__touch_exf_RelWithDebInfo tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o.ddi || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/api/ptgpp_touch_exf.touch + + +# ============================================================================= +# Link build statements for EXECUTABLE target touch_exf + + +############################################# +# Link the executable tests/api/touch_exf + +build tests/api/touch_exf: Fortran_EXECUTABLE_LINKER__touch_exf_RelWithDebInfo tests/api/CMakeFiles/touch_exf.dir/touch_exf.F90.o tests/api/CMakeFiles/touch_exf.dir/touch.c.o | parsec/libparsec.so.4.0.0 /usr/lib/x86_64-linux-gnu/libhwloc.so /usr/local/lib/libmpi_usempif08.so /usr/local/lib/libmpi_usempi_ignore_tkr.so /usr/local/lib/libmpi_mpifh.so /usr/local/lib/libmpi.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/api/ptgpp_touch_exf.touch parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic /usr/local/lib/libmpi_usempif08.so /usr/local/lib/libmpi_usempi_ignore_tkr.so /usr/local/lib/libmpi_mpifh.so /usr/local/lib/libmpi.so + OBJECT_DIR = tests/api/CMakeFiles/touch_exf.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/api/CMakeFiles/touch_exf.dir/ + TARGET_FILE = tests/api/touch_exf + TARGET_PDB = tests/api/touch_exf.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target compose + + +############################################# +# Order-only phony target for compose + +build cmake_object_order_depends_target_compose: phony || cmake_object_order_depends_target_parsec + +build tests/api/CMakeFiles/compose.dir/compose.c.o: C_COMPILER__compose_RelWithDebInfo ../tests/api/compose.c || cmake_object_order_depends_target_compose + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/api/CMakeFiles/compose.dir/compose.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/api/CMakeFiles/compose.dir + OBJECT_FILE_DIR = tests/api/CMakeFiles/compose.dir + TARGET_COMPILE_PDB = tests/api/CMakeFiles/compose.dir/ + TARGET_PDB = tests/api/compose.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target compose + + +############################################# +# Link the executable tests/api/compose + +build tests/api/compose: C_EXECUTABLE_LINKER__compose_RelWithDebInfo tests/api/CMakeFiles/compose.dir/compose.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/api/CMakeFiles/compose.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/api/CMakeFiles/compose.dir/ + TARGET_FILE = tests/api/compose + TARGET_PDB = tests/api/compose.pdb + + +############################################# +# Utility command for ptgpp_touch_exf.touch + +build tests/api/ptgpp_touch_exf.touch: phony tests/api/CMakeFiles/ptgpp_touch_exf.touch tests/api/touch.h tests/api/touch.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Custom command for tests/api/touch.h + +build tests/api/touch.h tests/api/touch.c: CUSTOM_COMMAND ../tests/api/touch.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/api/touch.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/api && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/api/touch.jdf -C touch.c -H touch.h -f touch + DESC = Generating touch.h, touch.c + restat = 1 + + +############################################# +# Phony custom command for tests/api/CMakeFiles/ptgpp_touch_ex_inline.touch + +build tests/api/CMakeFiles/ptgpp_touch_ex_inline.touch: phony tests/api/touch.h tests/api/touch.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/api/CMakeFiles/ptgpp_touch_ex.touch + +build tests/api/CMakeFiles/ptgpp_touch_ex.touch: phony tests/api/touch.h tests/api/touch.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/api/CMakeFiles/ptgpp_touch_exf.touch + +build tests/api/CMakeFiles/ptgpp_touch_exf.touch: phony tests/api/touch.h tests/api/touch.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install + +build tests/runtime/CMakeFiles/install.util: CUSTOM_COMMAND tests/runtime/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/runtime/install: phony tests/runtime/CMakeFiles/install.util + +# ============================================================================= +# Object build statements for EXECUTABLE target multichain + + +############################################# +# Order-only phony target for multichain + +build cmake_object_order_depends_target_multichain: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/runtime/multichain.c tests/runtime/multichain.h tests/runtime/ptgpp_multichain.multichain + +build tests/runtime/CMakeFiles/multichain.dir/multichain.c.o: C_COMPILER__multichain_RelWithDebInfo tests/runtime/multichain.c || cmake_object_order_depends_target_multichain + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/CMakeFiles/multichain.dir/multichain.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/CMakeFiles/multichain.dir + OBJECT_FILE_DIR = tests/runtime/CMakeFiles/multichain.dir + TARGET_COMPILE_PDB = tests/runtime/CMakeFiles/multichain.dir/ + TARGET_PDB = tests/runtime/multichain.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target multichain + + +############################################# +# Link the executable tests/runtime/multichain + +build tests/runtime/multichain: C_EXECUTABLE_LINKER__multichain_RelWithDebInfo tests/runtime/CMakeFiles/multichain.dir/multichain.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/runtime/ptgpp_multichain.multichain parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/runtime/CMakeFiles/multichain.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/runtime/CMakeFiles/multichain.dir/ + TARGET_FILE = tests/runtime/multichain + TARGET_PDB = tests/runtime/multichain.pdb + + +############################################# +# Utility command for ptgpp_multichain.multichain + +build tests/runtime/ptgpp_multichain.multichain: phony tests/runtime/CMakeFiles/ptgpp_multichain.multichain tests/runtime/multichain.h tests/runtime/multichain.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target dtt_bug_replicator + + +############################################# +# Order-only phony target for dtt_bug_replicator + +build cmake_object_order_depends_target_dtt_bug_replicator: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/runtime/dtt_bug_replicator.c tests/runtime/dtt_bug_replicator.h tests/runtime/ptgpp_dtt_bug_replicator.dtt_bug_replicator + +build tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator_ex.c.o: C_COMPILER__dtt_bug_replicator_RelWithDebInfo ../tests/runtime/dtt_bug_replicator_ex.c || cmake_object_order_depends_target_dtt_bug_replicator + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator_ex.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/CMakeFiles/dtt_bug_replicator.dir + OBJECT_FILE_DIR = tests/runtime/CMakeFiles/dtt_bug_replicator.dir + TARGET_COMPILE_PDB = tests/runtime/CMakeFiles/dtt_bug_replicator.dir/ + TARGET_PDB = tests/runtime/dtt_bug_replicator.pdb + +build tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator.c.o: C_COMPILER__dtt_bug_replicator_RelWithDebInfo tests/runtime/dtt_bug_replicator.c || cmake_object_order_depends_target_dtt_bug_replicator + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/CMakeFiles/dtt_bug_replicator.dir + OBJECT_FILE_DIR = tests/runtime/CMakeFiles/dtt_bug_replicator.dir + TARGET_COMPILE_PDB = tests/runtime/CMakeFiles/dtt_bug_replicator.dir/ + TARGET_PDB = tests/runtime/dtt_bug_replicator.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtt_bug_replicator + + +############################################# +# Link the executable tests/runtime/dtt_bug_replicator + +build tests/runtime/dtt_bug_replicator: C_EXECUTABLE_LINKER__dtt_bug_replicator_RelWithDebInfo tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator_ex.c.o tests/runtime/CMakeFiles/dtt_bug_replicator.dir/dtt_bug_replicator.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/runtime/ptgpp_dtt_bug_replicator.dtt_bug_replicator parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/runtime/CMakeFiles/dtt_bug_replicator.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/runtime/CMakeFiles/dtt_bug_replicator.dir/ + TARGET_FILE = tests/runtime/dtt_bug_replicator + TARGET_PDB = tests/runtime/dtt_bug_replicator.pdb + + +############################################# +# Utility command for rebuild_cache + +build tests/runtime/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/runtime/rebuild_cache: phony tests/runtime/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/local + +build tests/runtime/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/runtime/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/runtime/install/local: phony tests/runtime/CMakeFiles/install/local.util + + +############################################# +# Utility command for ptgpp_dtt_bug_replicator.dtt_bug_replicator + +build tests/runtime/ptgpp_dtt_bug_replicator.dtt_bug_replicator: phony tests/runtime/CMakeFiles/ptgpp_dtt_bug_replicator.dtt_bug_replicator tests/runtime/dtt_bug_replicator.h tests/runtime/dtt_bug_replicator.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/runtime/CMakeFiles/package.util: CUSTOM_COMMAND tests/runtime/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/runtime/package: phony tests/runtime/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/runtime/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/runtime/test: phony tests/runtime/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/runtime/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/runtime/package_source: phony tests/runtime/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/runtime/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/runtime/edit_cache: phony tests/runtime/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/runtime/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/runtime/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/runtime/install/strip: phony tests/runtime/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/runtime/list_install_components: phony + + +############################################# +# Custom command for tests/runtime/multichain.h + +build tests/runtime/multichain.h tests/runtime/multichain.c: CUSTOM_COMMAND ../tests/runtime/multichain.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/runtime/multichain.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/runtime/multichain.jdf -C multichain.c -H multichain.h -f multichain + DESC = Generating multichain.h, multichain.c + restat = 1 + + +############################################# +# Phony custom command for tests/runtime/CMakeFiles/ptgpp_multichain.multichain + +build tests/runtime/CMakeFiles/ptgpp_multichain.multichain: phony tests/runtime/multichain.h tests/runtime/multichain.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/runtime/dtt_bug_replicator.h + +build tests/runtime/dtt_bug_replicator.h tests/runtime/dtt_bug_replicator.c: CUSTOM_COMMAND ../tests/runtime/dtt_bug_replicator.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/runtime/dtt_bug_replicator.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/runtime/dtt_bug_replicator.jdf -C dtt_bug_replicator.c -H dtt_bug_replicator.h -f dtt_bug_replicator + DESC = Generating dtt_bug_replicator.h, dtt_bug_replicator.c + restat = 1 + + +############################################# +# Phony custom command for tests/runtime/CMakeFiles/ptgpp_dtt_bug_replicator.dtt_bug_replicator + +build tests/runtime/CMakeFiles/ptgpp_dtt_bug_replicator.dtt_bug_replicator: phony tests/runtime/dtt_bug_replicator.h tests/runtime/dtt_bug_replicator.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/runtime/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tests/runtime/scheduling/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/runtime/scheduling/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/runtime/scheduling/install/strip: phony tests/runtime/scheduling/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/runtime/scheduling/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/runtime/scheduling/CMakeFiles/install.util: CUSTOM_COMMAND tests/runtime/scheduling/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/runtime/scheduling/install: phony tests/runtime/scheduling/CMakeFiles/install.util + + +############################################# +# Utility command for ptgpp_schedmicro.ep + +build tests/runtime/scheduling/ptgpp_schedmicro.ep: phony tests/runtime/scheduling/CMakeFiles/ptgpp_schedmicro.ep tests/runtime/scheduling/ep.h tests/runtime/scheduling/ep.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/runtime/scheduling/CMakeFiles/package.util: CUSTOM_COMMAND tests/runtime/scheduling/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/runtime/scheduling/package: phony tests/runtime/scheduling/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/runtime/scheduling/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/runtime/scheduling/test: phony tests/runtime/scheduling/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/runtime/scheduling/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/runtime/scheduling/package_source: phony tests/runtime/scheduling/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/runtime/scheduling/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/runtime/scheduling/edit_cache: phony tests/runtime/scheduling/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/runtime/scheduling/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/runtime/scheduling/rebuild_cache: phony tests/runtime/scheduling/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target schedmicro + + +############################################# +# Order-only phony target for schedmicro + +build cmake_object_order_depends_target_schedmicro: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/runtime/scheduling/ep.c tests/runtime/scheduling/ep.h tests/runtime/scheduling/ptgpp_schedmicro.ep + +build tests/runtime/scheduling/CMakeFiles/schedmicro.dir/main.c.o: C_COMPILER__schedmicro_RelWithDebInfo ../tests/runtime/scheduling/main.c || cmake_object_order_depends_target_schedmicro + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime/scheduling -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + OBJECT_FILE_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + TARGET_COMPILE_PDB = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ + TARGET_PDB = tests/runtime/scheduling/schedmicro.pdb + +build tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep_wrapper.c.o: C_COMPILER__schedmicro_RelWithDebInfo ../tests/runtime/scheduling/ep_wrapper.c || cmake_object_order_depends_target_schedmicro + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime/scheduling -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + OBJECT_FILE_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + TARGET_COMPILE_PDB = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ + TARGET_PDB = tests/runtime/scheduling/schedmicro.pdb + +build tests/runtime/scheduling/CMakeFiles/schedmicro.dir/schedmicro_data.c.o: C_COMPILER__schedmicro_RelWithDebInfo ../tests/runtime/scheduling/schedmicro_data.c || cmake_object_order_depends_target_schedmicro + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/schedmicro_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime/scheduling -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + OBJECT_FILE_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + TARGET_COMPILE_PDB = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ + TARGET_PDB = tests/runtime/scheduling/schedmicro.pdb + +build tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep.c.o: C_COMPILER__schedmicro_RelWithDebInfo tests/runtime/scheduling/ep.c || cmake_object_order_depends_target_schedmicro + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/runtime/scheduling -Iparsec/include/fortran + OBJECT_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + OBJECT_FILE_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + TARGET_COMPILE_PDB = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ + TARGET_PDB = tests/runtime/scheduling/schedmicro.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target schedmicro + + +############################################# +# Link the executable tests/runtime/scheduling/schedmicro + +build tests/runtime/scheduling/schedmicro: C_EXECUTABLE_LINKER__schedmicro_RelWithDebInfo tests/runtime/scheduling/CMakeFiles/schedmicro.dir/main.c.o tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep_wrapper.c.o tests/runtime/scheduling/CMakeFiles/schedmicro.dir/schedmicro_data.c.o tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ep.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/runtime/scheduling/ptgpp_schedmicro.ep parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/runtime/scheduling/CMakeFiles/schedmicro.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/runtime/scheduling/CMakeFiles/schedmicro.dir/ + TARGET_FILE = tests/runtime/scheduling/schedmicro + TARGET_PDB = tests/runtime/scheduling/schedmicro.pdb + + +############################################# +# Utility command for install/local + +build tests/runtime/scheduling/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/runtime/scheduling/all + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/runtime/scheduling/install/local: phony tests/runtime/scheduling/CMakeFiles/install/local.util + + +############################################# +# Phony custom command for tests/runtime/scheduling/CMakeFiles/ptgpp_schedmicro.ep + +build tests/runtime/scheduling/CMakeFiles/ptgpp_schedmicro.ep: phony tests/runtime/scheduling/ep.h tests/runtime/scheduling/ep.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/runtime/scheduling/ep.h + +build tests/runtime/scheduling/ep.h tests/runtime/scheduling/ep.c: CUSTOM_COMMAND ../tests/runtime/scheduling/ep.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/runtime/scheduling/ep.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/runtime/scheduling && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/runtime/scheduling/ep.jdf -C ep.c -H ep.h -f ep + DESC = Generating ep.h, ep.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/install/local: phony tests/dsl/ptg/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tests/dsl/ptg/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/install: phony tests/dsl/ptg/CMakeFiles/install.util + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/rebuild_cache: phony tests/dsl/ptg/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target strange + + +############################################# +# Order-only phony target for strange + +build cmake_object_order_depends_target_strange: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp_strange.strange tests/dsl/ptg/strange.c tests/dsl/ptg/strange.h + +build tests/dsl/ptg/CMakeFiles/strange.dir/strange.c.o: C_COMPILER__strange_RelWithDebInfo tests/dsl/ptg/strange.c || cmake_object_order_depends_target_strange + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/CMakeFiles/strange.dir/strange.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/strange.dir + OBJECT_FILE_DIR = tests/dsl/ptg/CMakeFiles/strange.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/strange.dir/ + TARGET_PDB = tests/dsl/ptg/strange.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target strange + + +############################################# +# Link the executable tests/dsl/ptg/strange + +build tests/dsl/ptg/strange: C_EXECUTABLE_LINKER__strange_RelWithDebInfo tests/dsl/ptg/CMakeFiles/strange.dir/strange.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp_strange.strange parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/strange.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/strange.dir/ + TARGET_FILE = tests/dsl/ptg/strange + TARGET_PDB = tests/dsl/ptg/strange.pdb + + +############################################# +# Utility command for ptgpp_strange.strange + +build tests/dsl/ptg/ptgpp_strange.strange: phony tests/dsl/ptg/CMakeFiles/ptgpp_strange.strange tests/dsl/ptg/strange.h tests/dsl/ptg/strange.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target startup + + +############################################# +# Order-only phony target for startup + +build cmake_object_order_depends_target_startup: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp_startup.startup tests/dsl/ptg/startup.c tests/dsl/ptg/startup.h + +build tests/dsl/ptg/CMakeFiles/startup.dir/startup.c.o: C_COMPILER__startup_RelWithDebInfo tests/dsl/ptg/startup.c || cmake_object_order_depends_target_startup + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/CMakeFiles/startup.dir/startup.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/startup.dir + OBJECT_FILE_DIR = tests/dsl/ptg/CMakeFiles/startup.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/startup.dir/ + TARGET_PDB = tests/dsl/ptg/startup.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target startup + + +############################################# +# Link the executable tests/dsl/ptg/startup + +build tests/dsl/ptg/startup: C_EXECUTABLE_LINKER__startup_RelWithDebInfo tests/dsl/ptg/CMakeFiles/startup.dir/startup.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp_startup.startup parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/startup.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/startup.dir/ + TARGET_FILE = tests/dsl/ptg/startup + TARGET_PDB = tests/dsl/ptg/startup.pdb + + +############################################# +# Utility command for ptgpp_complex_deps.complex_deps + +build tests/dsl/ptg/ptgpp_complex_deps.complex_deps: phony tests/dsl/ptg/CMakeFiles/ptgpp_complex_deps.complex_deps tests/dsl/ptg/complex_deps.h tests/dsl/ptg/complex_deps.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/edit_cache: phony tests/dsl/ptg/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for ptgpp_startup.startup + +build tests/dsl/ptg/ptgpp_startup.startup: phony tests/dsl/ptg/CMakeFiles/ptgpp_startup.startup tests/dsl/ptg/startup.h tests/dsl/ptg/startup.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/package_source: phony tests/dsl/ptg/CMakeFiles/package_source.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/install/strip: phony tests/dsl/ptg/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/list_install_components: phony + +# ============================================================================= +# Object build statements for EXECUTABLE target complex_deps + + +############################################# +# Order-only phony target for complex_deps + +build cmake_object_order_depends_target_complex_deps: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/complex_deps.c tests/dsl/ptg/complex_deps.h tests/dsl/ptg/ptgpp_complex_deps.complex_deps + +build tests/dsl/ptg/CMakeFiles/complex_deps.dir/complex_deps.c.o: C_COMPILER__complex_deps_RelWithDebInfo tests/dsl/ptg/complex_deps.c || cmake_object_order_depends_target_complex_deps + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/CMakeFiles/complex_deps.dir/complex_deps.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/complex_deps.dir + OBJECT_FILE_DIR = tests/dsl/ptg/CMakeFiles/complex_deps.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/complex_deps.dir/ + TARGET_PDB = tests/dsl/ptg/complex_deps.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target complex_deps + + +############################################# +# Link the executable tests/dsl/ptg/complex_deps + +build tests/dsl/ptg/complex_deps: C_EXECUTABLE_LINKER__complex_deps_RelWithDebInfo tests/dsl/ptg/CMakeFiles/complex_deps.dir/complex_deps.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp_complex_deps.complex_deps parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/CMakeFiles/complex_deps.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/CMakeFiles/complex_deps.dir/ + TARGET_FILE = tests/dsl/ptg/complex_deps + TARGET_PDB = tests/dsl/ptg/complex_deps.pdb + + +############################################# +# Utility command for package + +build tests/dsl/ptg/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/package: phony tests/dsl/ptg/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/test: phony tests/dsl/ptg/CMakeFiles/test.util + + +############################################# +# Custom command for tests/dsl/ptg/strange.h + +build tests/dsl/ptg/strange.h tests/dsl/ptg/strange.c: CUSTOM_COMMAND ../tests/dsl/ptg/strange.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/strange.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/strange.jdf -C strange.c -H strange.h -f strange + DESC = Generating strange.h, strange.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/CMakeFiles/ptgpp_strange.strange + +build tests/dsl/ptg/CMakeFiles/ptgpp_strange.strange: phony tests/dsl/ptg/strange.h tests/dsl/ptg/strange.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/startup.h + +build tests/dsl/ptg/startup.h tests/dsl/ptg/startup.c: CUSTOM_COMMAND ../tests/dsl/ptg/startup.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/startup.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/startup.jdf -C startup.c -H startup.h -f startup + DESC = Generating startup.h, startup.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/CMakeFiles/ptgpp_complex_deps.complex_deps + +build tests/dsl/ptg/CMakeFiles/ptgpp_complex_deps.complex_deps: phony tests/dsl/ptg/complex_deps.h tests/dsl/ptg/complex_deps.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/complex_deps.h + +build tests/dsl/ptg/complex_deps.h tests/dsl/ptg/complex_deps.c: CUSTOM_COMMAND ../tests/dsl/ptg/complex_deps.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/complex_deps.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/complex_deps.jdf -C complex_deps.c -H complex_deps.h -f complex_deps + DESC = Generating complex_deps.h, complex_deps.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/CMakeFiles/ptgpp_startup.startup + +build tests/dsl/ptg/CMakeFiles/ptgpp_startup.startup: phony tests/dsl/ptg/startup.h tests/dsl/ptg/startup.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/ptgpp/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/ptgpp/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/install/strip: phony tests/dsl/ptg/ptgpp/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/ptgpp/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/ptgpp/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/install/local: phony tests/dsl/ptg/ptgpp/CMakeFiles/install/local.util + + +############################################# +# Utility command for ptgpp_must_fail_too_many_in_deps.too_many_in_deps + +build tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_in_deps.too_many_in_deps: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_in_deps.too_many_in_deps tests/dsl/ptg/ptgpp/too_many_in_deps.h tests/dsl/ptg/ptgpp/too_many_in_deps.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_must_fail_too_many_read_flows.too_many_read_flows + +build tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_read_flows.too_many_read_flows: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_read_flows.too_many_read_flows tests/dsl/ptg/ptgpp/too_many_read_flows.h tests/dsl/ptg/ptgpp/too_many_read_flows.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target jdf_forward_READ_NULL + + +############################################# +# Order-only phony target for jdf_forward_READ_NULL + +build cmake_object_order_depends_target_jdf_forward_READ_NULL: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/forward_READ_NULL.c tests/dsl/ptg/ptgpp/forward_READ_NULL.h tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL + +build tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir/forward_READ_NULL.c.o: C_COMPILER__jdf_forward_READ_NULL_RelWithDebInfo tests/dsl/ptg/ptgpp/forward_READ_NULL.c || cmake_object_order_depends_target_jdf_forward_READ_NULL + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir/forward_READ_NULL.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target jdf_forward_READ_NULL + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL + +build tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL: C_EXECUTABLE_LINKER__jdf_forward_READ_NULL_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir/forward_READ_NULL.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_READ_NULL.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL + TARGET_PDB = tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL.pdb + + +############################################# +# Utility command for ptgpp_must_fail_too_many_local_vars.too_many_local_vars + +build tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_local_vars.too_many_local_vars: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_local_vars.too_many_local_vars tests/dsl/ptg/ptgpp/too_many_local_vars.h tests/dsl/ptg/ptgpp/too_many_local_vars.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target jdf_forward_RW_NULL + + +############################################# +# Order-only phony target for jdf_forward_RW_NULL + +build cmake_object_order_depends_target_jdf_forward_RW_NULL: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/forward_RW_NULL.c tests/dsl/ptg/ptgpp/forward_RW_NULL.h tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL + +build tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir/forward_RW_NULL.c.o: C_COMPILER__jdf_forward_RW_NULL_RelWithDebInfo tests/dsl/ptg/ptgpp/forward_RW_NULL.c || cmake_object_order_depends_target_jdf_forward_RW_NULL + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir/forward_RW_NULL.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target jdf_forward_RW_NULL + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL + +build tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL: C_EXECUTABLE_LINKER__jdf_forward_RW_NULL_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir/forward_RW_NULL.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/jdf_forward_RW_NULL.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL + TARGET_PDB = tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL.pdb + + +############################################# +# Utility command for test + +build tests/dsl/ptg/ptgpp/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/test: phony tests/dsl/ptg/ptgpp/CMakeFiles/test.util + + +############################################# +# Utility command for ptgpp_write_check.write_check + +build tests/dsl/ptg/ptgpp/ptgpp_write_check.write_check: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_write_check.write_check tests/dsl/ptg/ptgpp/write_check.h tests/dsl/ptg/ptgpp/write_check.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install + +build tests/dsl/ptg/ptgpp/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/ptgpp/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/install: phony tests/dsl/ptg/ptgpp/CMakeFiles/install.util + +# ============================================================================= +# Object build statements for EXECUTABLE target must_fail_too_many_out_deps + + +############################################# +# Order-only phony target for must_fail_too_many_out_deps + +build cmake_object_order_depends_target_must_fail_too_many_out_deps: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_out_deps.too_many_out_deps tests/dsl/ptg/ptgpp/too_many_out_deps.c tests/dsl/ptg/ptgpp/too_many_out_deps.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir/too_many_out_deps.c.o: C_COMPILER__must_fail_too_many_out_deps_RelWithDebInfo tests/dsl/ptg/ptgpp/too_many_out_deps.c || cmake_object_order_depends_target_must_fail_too_many_out_deps + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir/too_many_out_deps.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target must_fail_too_many_out_deps + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps + +build tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps: C_EXECUTABLE_LINKER__must_fail_too_many_out_deps_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir/too_many_out_deps.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_out_deps.too_many_out_deps parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_out_deps.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps.pdb + + +############################################# +# Utility command for ptgpp_must_fail_too_many_out_deps.too_many_out_deps + +build tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_out_deps.too_many_out_deps: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_out_deps.too_many_out_deps tests/dsl/ptg/ptgpp/too_many_out_deps.h tests/dsl/ptg/ptgpp/too_many_out_deps.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target must_fail_too_many_in_deps + + +############################################# +# Order-only phony target for must_fail_too_many_in_deps + +build cmake_object_order_depends_target_must_fail_too_many_in_deps: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_in_deps.too_many_in_deps tests/dsl/ptg/ptgpp/too_many_in_deps.c tests/dsl/ptg/ptgpp/too_many_in_deps.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir/too_many_in_deps.c.o: C_COMPILER__must_fail_too_many_in_deps_RelWithDebInfo tests/dsl/ptg/ptgpp/too_many_in_deps.c || cmake_object_order_depends_target_must_fail_too_many_in_deps + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir/too_many_in_deps.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target must_fail_too_many_in_deps + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps + +build tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps: C_EXECUTABLE_LINKER__must_fail_too_many_in_deps_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir/too_many_in_deps.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_in_deps.too_many_in_deps parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_in_deps.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps.pdb + + +############################################# +# Utility command for ptgpp_must_fail_too_many_write_flows.too_many_write_flows + +build tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_write_flows.too_many_write_flows: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_write_flows.too_many_write_flows tests/dsl/ptg/ptgpp/too_many_write_flows.h tests/dsl/ptg/ptgpp/too_many_write_flows.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target write_check + + +############################################# +# Order-only phony target for write_check + +build cmake_object_order_depends_target_write_check: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_write_check.write_check tests/dsl/ptg/ptgpp/write_check.c tests/dsl/ptg/ptgpp/write_check.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/vector.c.o: C_COMPILER__write_check_RelWithDebInfo ../tests/dsl/ptg/ptgpp/vector.c || cmake_object_order_depends_target_write_check + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/vector.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/write_check.pdb + +build tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/write_check.c.o: C_COMPILER__write_check_RelWithDebInfo tests/dsl/ptg/ptgpp/write_check.c || cmake_object_order_depends_target_write_check + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/write_check.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/write_check.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target write_check + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/write_check + +build tests/dsl/ptg/ptgpp/write_check: C_EXECUTABLE_LINKER__write_check_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/vector.c.o tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/write_check.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_write_check.write_check parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/write_check.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/write_check + TARGET_PDB = tests/dsl/ptg/ptgpp/write_check.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target must_fail_too_many_local_vars + + +############################################# +# Order-only phony target for must_fail_too_many_local_vars + +build cmake_object_order_depends_target_must_fail_too_many_local_vars: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_local_vars.too_many_local_vars tests/dsl/ptg/ptgpp/too_many_local_vars.c tests/dsl/ptg/ptgpp/too_many_local_vars.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir/too_many_local_vars.c.o: C_COMPILER__must_fail_too_many_local_vars_RelWithDebInfo tests/dsl/ptg/ptgpp/too_many_local_vars.c || cmake_object_order_depends_target_must_fail_too_many_local_vars + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir/too_many_local_vars.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target must_fail_too_many_local_vars + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars + +build tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars: C_EXECUTABLE_LINKER__must_fail_too_many_local_vars_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir/too_many_local_vars.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_local_vars.too_many_local_vars parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_local_vars.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target must_fail_too_many_read_flows + + +############################################# +# Order-only phony target for must_fail_too_many_read_flows + +build cmake_object_order_depends_target_must_fail_too_many_read_flows: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_read_flows.too_many_read_flows tests/dsl/ptg/ptgpp/too_many_read_flows.c tests/dsl/ptg/ptgpp/too_many_read_flows.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir/too_many_read_flows.c.o: C_COMPILER__must_fail_too_many_read_flows_RelWithDebInfo tests/dsl/ptg/ptgpp/too_many_read_flows.c || cmake_object_order_depends_target_must_fail_too_many_read_flows + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir/too_many_read_flows.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target must_fail_too_many_read_flows + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows + +build tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows: C_EXECUTABLE_LINKER__must_fail_too_many_read_flows_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir/too_many_read_flows.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_read_flows.too_many_read_flows parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_read_flows.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows.pdb + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/ptgpp/list_install_components: phony + + +############################################# +# Utility command for ptgpp_jdf_forward_RW_NULL.forward_RW_NULL + +build tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL tests/dsl/ptg/ptgpp/forward_RW_NULL.h tests/dsl/ptg/ptgpp/forward_RW_NULL.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_jdf_forward_READ_NULL.forward_READ_NULL + +build tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL: phony tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL tests/dsl/ptg/ptgpp/forward_READ_NULL.h tests/dsl/ptg/ptgpp/forward_READ_NULL.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target must_fail_too_many_write_flows + + +############################################# +# Order-only phony target for must_fail_too_many_write_flows + +build cmake_object_order_depends_target_must_fail_too_many_write_flows: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_write_flows.too_many_write_flows tests/dsl/ptg/ptgpp/too_many_write_flows.c tests/dsl/ptg/ptgpp/too_many_write_flows.h + +build tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir/too_many_write_flows.c.o: C_COMPILER__must_fail_too_many_write_flows_RelWithDebInfo tests/dsl/ptg/ptgpp/too_many_write_flows.c || cmake_object_order_depends_target_must_fail_too_many_write_flows + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir/too_many_write_flows.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/ptgpp -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir + OBJECT_FILE_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir/ + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target must_fail_too_many_write_flows + + +############################################# +# Link the executable tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows + +build tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows: C_EXECUTABLE_LINKER__must_fail_too_many_write_flows_RelWithDebInfo tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir/too_many_write_flows.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_write_flows.too_many_write_flows parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/ptgpp/CMakeFiles/must_fail_too_many_write_flows.dir/ + TARGET_FILE = tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows + TARGET_PDB = tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows.pdb + + +############################################# +# Utility command for package + +build tests/dsl/ptg/ptgpp/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/ptgpp/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/package: phony tests/dsl/ptg/ptgpp/CMakeFiles/package.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/ptgpp/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/package_source: phony tests/dsl/ptg/ptgpp/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/ptgpp/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/ptgpp/edit_cache: phony tests/dsl/ptg/ptgpp/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/ptgpp/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/ptgpp/rebuild_cache: phony tests/dsl/ptg/ptgpp/CMakeFiles/rebuild_cache.util + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_in_deps.too_many_in_deps + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_in_deps.too_many_in_deps: phony tests/dsl/ptg/ptgpp/too_many_in_deps.h tests/dsl/ptg/ptgpp/too_many_in_deps.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/too_many_in_deps.h + +build tests/dsl/ptg/ptgpp/too_many_in_deps.h tests/dsl/ptg/ptgpp/too_many_in_deps.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/too_many_in_deps.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/too_many_in_deps.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/too_many_in_deps.jdf -C too_many_in_deps.c -H too_many_in_deps.h -f too_many_in_deps + DESC = Generating too_many_in_deps.h, too_many_in_deps.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_read_flows.too_many_read_flows + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_read_flows.too_many_read_flows: phony tests/dsl/ptg/ptgpp/too_many_read_flows.h tests/dsl/ptg/ptgpp/too_many_read_flows.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/too_many_read_flows.h + +build tests/dsl/ptg/ptgpp/too_many_read_flows.h tests/dsl/ptg/ptgpp/too_many_read_flows.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/too_many_read_flows.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/too_many_read_flows.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/too_many_read_flows.jdf -C too_many_read_flows.c -H too_many_read_flows.h -f too_many_read_flows + DESC = Generating too_many_read_flows.h, too_many_read_flows.c + restat = 1 + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/forward_READ_NULL.h + +build tests/dsl/ptg/ptgpp/forward_READ_NULL.h tests/dsl/ptg/ptgpp/forward_READ_NULL.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/forward_READ_NULL.jdf -C forward_READ_NULL.c -H forward_READ_NULL.h -f forward_READ_NULL + DESC = Generating forward_READ_NULL.h, forward_READ_NULL.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_local_vars.too_many_local_vars + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_local_vars.too_many_local_vars: phony tests/dsl/ptg/ptgpp/too_many_local_vars.h tests/dsl/ptg/ptgpp/too_many_local_vars.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/too_many_local_vars.h + +build tests/dsl/ptg/ptgpp/too_many_local_vars.h tests/dsl/ptg/ptgpp/too_many_local_vars.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/too_many_local_vars.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/too_many_local_vars.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/too_many_local_vars.jdf -C too_many_local_vars.c -H too_many_local_vars.h -f too_many_local_vars + DESC = Generating too_many_local_vars.h, too_many_local_vars.c + restat = 1 + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/forward_RW_NULL.h + +build tests/dsl/ptg/ptgpp/forward_RW_NULL.h tests/dsl/ptg/ptgpp/forward_RW_NULL.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/forward_RW_NULL.jdf -C forward_RW_NULL.c -H forward_RW_NULL.h -f forward_RW_NULL + DESC = Generating forward_RW_NULL.h, forward_RW_NULL.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_write_check.write_check + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_write_check.write_check: phony tests/dsl/ptg/ptgpp/write_check.h tests/dsl/ptg/ptgpp/write_check.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/write_check.h + +build tests/dsl/ptg/ptgpp/write_check.h tests/dsl/ptg/ptgpp/write_check.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/write_check.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/write_check.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/write_check.jdf -C write_check.c -H write_check.h -f write_check + DESC = Generating write_check.h, write_check.c + restat = 1 + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/too_many_out_deps.h + +build tests/dsl/ptg/ptgpp/too_many_out_deps.h tests/dsl/ptg/ptgpp/too_many_out_deps.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/too_many_out_deps.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/too_many_out_deps.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/too_many_out_deps.jdf -C too_many_out_deps.c -H too_many_out_deps.h -f too_many_out_deps + DESC = Generating too_many_out_deps.h, too_many_out_deps.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_out_deps.too_many_out_deps + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_out_deps.too_many_out_deps: phony tests/dsl/ptg/ptgpp/too_many_out_deps.h tests/dsl/ptg/ptgpp/too_many_out_deps.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_write_flows.too_many_write_flows + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_must_fail_too_many_write_flows.too_many_write_flows: phony tests/dsl/ptg/ptgpp/too_many_write_flows.h tests/dsl/ptg/ptgpp/too_many_write_flows.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/ptgpp/too_many_write_flows.h + +build tests/dsl/ptg/ptgpp/too_many_write_flows.h tests/dsl/ptg/ptgpp/too_many_write_flows.c: CUSTOM_COMMAND ../tests/dsl/ptg/ptgpp/too_many_write_flows.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/ptgpp/too_many_write_flows.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/ptgpp/too_many_write_flows.jdf -C too_many_write_flows.c -H too_many_write_flows.h -f too_many_write_flows + DESC = Generating too_many_write_flows.h, too_many_write_flows.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL: phony tests/dsl/ptg/ptgpp/forward_RW_NULL.h tests/dsl/ptg/ptgpp/forward_RW_NULL.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL + +build tests/dsl/ptg/ptgpp/CMakeFiles/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL: phony tests/dsl/ptg/ptgpp/forward_READ_NULL.h tests/dsl/ptg/ptgpp/forward_READ_NULL.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/branching/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/branching/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/install/local: phony tests/dsl/ptg/branching/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tests/dsl/ptg/branching/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/branching/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/install: phony tests/dsl/ptg/branching/CMakeFiles/install.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/branching/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/branching/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/install/strip: phony tests/dsl/ptg/branching/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/branching/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/branching/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/rebuild_cache: phony tests/dsl/ptg/branching/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target branching + + +############################################# +# Order-only phony target for branching + +build cmake_object_order_depends_target_branching: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/branching/branching.c tests/dsl/ptg/branching/branching.h tests/dsl/ptg/branching/ptgpp_branching.branching + +build tests/dsl/ptg/branching/CMakeFiles/branching.dir/main.c.o: C_COMPILER__branching_RelWithDebInfo ../tests/dsl/ptg/branching/main.c || cmake_object_order_depends_target_branching + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_wrapper.c.o: C_COMPILER__branching_RelWithDebInfo ../tests/dsl/ptg/branching/branching_wrapper.c || cmake_object_order_depends_target_branching + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_data.c.o: C_COMPILER__branching_RelWithDebInfo ../tests/dsl/ptg/branching/branching_data.c || cmake_object_order_depends_target_branching + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching.c.o: C_COMPILER__branching_RelWithDebInfo tests/dsl/ptg/branching/branching.c || cmake_object_order_depends_target_branching + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target branching + + +############################################# +# Link the executable tests/dsl/ptg/branching/branching + +build tests/dsl/ptg/branching/branching: C_EXECUTABLE_LINKER__branching_RelWithDebInfo tests/dsl/ptg/branching/CMakeFiles/branching.dir/main.c.o tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_wrapper.c.o tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching_data.c.o tests/dsl/ptg/branching/CMakeFiles/branching.dir/branching.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/branching/ptgpp_branching.branching parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching.dir/ + TARGET_FILE = tests/dsl/ptg/branching/branching + TARGET_PDB = tests/dsl/ptg/branching/branching.pdb + + +############################################# +# Utility command for ptgpp_branching.branching + +build tests/dsl/ptg/branching/ptgpp_branching.branching: phony tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching.branching tests/dsl/ptg/branching/branching.h tests/dsl/ptg/branching/branching.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_branching_ht.branching_ht + +build tests/dsl/ptg/branching/ptgpp_branching_ht.branching_ht: phony tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_ht.branching_ht tests/dsl/ptg/branching/branching_ht.h tests/dsl/ptg/branching/branching_ht.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target branching_ht + + +############################################# +# Order-only phony target for branching_ht + +build cmake_object_order_depends_target_branching_ht: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/branching_ht.c tests/dsl/ptg/branching/branching_ht.h tests/dsl/ptg/branching/ptgpp_branching_ht.branching_ht + +build tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/main.c.o: C_COMPILER__branching_ht_RelWithDebInfo ../tests/dsl/ptg/branching/main.c || cmake_object_order_depends_target_branching_ht + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_ht.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_wrapper.c.o: C_COMPILER__branching_ht_RelWithDebInfo ../tests/dsl/ptg/branching/branching_wrapper.c || cmake_object_order_depends_target_branching_ht + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_ht.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_data.c.o: C_COMPILER__branching_ht_RelWithDebInfo ../tests/dsl/ptg/branching/branching_data.c || cmake_object_order_depends_target_branching_ht + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_ht.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_ht.c.o: C_COMPILER__branching_ht_RelWithDebInfo tests/dsl/ptg/branching/branching_ht.c || cmake_object_order_depends_target_branching_ht + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_ht.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_ht.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target branching_ht + + +############################################# +# Link the executable tests/dsl/ptg/branching/branching_ht + +build tests/dsl/ptg/branching/branching_ht: C_EXECUTABLE_LINKER__branching_ht_RelWithDebInfo tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/main.c.o tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_wrapper.c.o tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_data.c.o tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/branching_ht.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/ptgpp_branching_ht.branching_ht parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_ht.dir/ + TARGET_FILE = tests/dsl/ptg/branching/branching_ht + TARGET_PDB = tests/dsl/ptg/branching/branching_ht.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target branching_idxarr + + +############################################# +# Order-only phony target for branching_idxarr + +build cmake_object_order_depends_target_branching_idxarr: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/branching_idxarr.c tests/dsl/ptg/branching/branching_idxarr.h tests/dsl/ptg/branching/ptgpp_branching_idxarr.branching_idxarr + +build tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/main.c.o: C_COMPILER__branching_idxarr_RelWithDebInfo ../tests/dsl/ptg/branching/main.c || cmake_object_order_depends_target_branching_idxarr + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_idxarr.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_wrapper.c.o: C_COMPILER__branching_idxarr_RelWithDebInfo ../tests/dsl/ptg/branching/branching_wrapper.c || cmake_object_order_depends_target_branching_idxarr + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_idxarr.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_data.c.o: C_COMPILER__branching_idxarr_RelWithDebInfo ../tests/dsl/ptg/branching/branching_data.c || cmake_object_order_depends_target_branching_idxarr + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_idxarr.pdb + +build tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_idxarr.c.o: C_COMPILER__branching_idxarr_RelWithDebInfo tests/dsl/ptg/branching/branching_idxarr.c || cmake_object_order_depends_target_branching_idxarr + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_idxarr.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/branching -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + OBJECT_FILE_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/ + TARGET_PDB = tests/dsl/ptg/branching/branching_idxarr.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target branching_idxarr + + +############################################# +# Link the executable tests/dsl/ptg/branching/branching_idxarr + +build tests/dsl/ptg/branching/branching_idxarr: C_EXECUTABLE_LINKER__branching_idxarr_RelWithDebInfo tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/main.c.o tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_wrapper.c.o tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_data.c.o tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/branching_idxarr.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/ptgpp_branching_idxarr.branching_idxarr parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/branching/CMakeFiles/branching_idxarr.dir/ + TARGET_FILE = tests/dsl/ptg/branching/branching_idxarr + TARGET_PDB = tests/dsl/ptg/branching/branching_idxarr.pdb + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/branching/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/branching/edit_cache: phony tests/dsl/ptg/branching/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for ptgpp_branching_idxarr.branching_idxarr + +build tests/dsl/ptg/branching/ptgpp_branching_idxarr.branching_idxarr: phony tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_idxarr.branching_idxarr tests/dsl/ptg/branching/branching_idxarr.h tests/dsl/ptg/branching/branching_idxarr.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/dsl/ptg/branching/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/branching/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/package: phony tests/dsl/ptg/branching/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/branching/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/test: phony tests/dsl/ptg/branching/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/branching/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/branching/package_source: phony tests/dsl/ptg/branching/CMakeFiles/package_source.util + + +############################################# +# Custom command for tests/dsl/ptg/branching/branching.h + +build tests/dsl/ptg/branching/branching.h tests/dsl/ptg/branching/branching.c: CUSTOM_COMMAND ../tests/dsl/ptg/branching/branching.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/branching/branching.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/branching/branching.jdf -C branching.c -H branching.h -f branching + DESC = Generating branching.h, branching.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching.branching + +build tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching.branching: phony tests/dsl/ptg/branching/branching.h tests/dsl/ptg/branching/branching.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_ht.branching_ht + +build tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_ht.branching_ht: phony tests/dsl/ptg/branching/branching_ht.h tests/dsl/ptg/branching/branching_ht.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/branching/branching_ht.h + +build tests/dsl/ptg/branching/branching_ht.h tests/dsl/ptg/branching/branching_ht.c: CUSTOM_COMMAND ../tests/dsl/ptg/branching/branching.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/branching/branching.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --dep-management dynamic-hash-table -E -i /home/joseph/parsec/tests/dsl/ptg/branching/branching.jdf -C branching_ht.c -H branching_ht.h -f branching + DESC = Generating branching_ht.h, branching_ht.c + restat = 1 + + +############################################# +# Custom command for tests/dsl/ptg/branching/branching_idxarr.h + +build tests/dsl/ptg/branching/branching_idxarr.h tests/dsl/ptg/branching/branching_idxarr.c: CUSTOM_COMMAND ../tests/dsl/ptg/branching/branching.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/branching/branching.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/branching && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --dep-management index-array -E -i /home/joseph/parsec/tests/dsl/ptg/branching/branching.jdf -C branching_idxarr.c -H branching_idxarr.h -f branching + DESC = Generating branching_idxarr.h, branching_idxarr.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_idxarr.branching_idxarr + +build tests/dsl/ptg/branching/CMakeFiles/ptgpp_branching_idxarr.branching_idxarr: phony tests/dsl/ptg/branching/branching_idxarr.h tests/dsl/ptg/branching/branching_idxarr.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/choice/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/choice/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/install/local: phony tests/dsl/ptg/choice/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target choice + + +############################################# +# Order-only phony target for choice + +build cmake_object_order_depends_target_choice: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/choice/choice.c tests/dsl/ptg/choice/choice.h tests/dsl/ptg/choice/ptgpp_choice.choice + +build tests/dsl/ptg/choice/CMakeFiles/choice.dir/main.c.o: C_COMPILER__choice_RelWithDebInfo ../tests/dsl/ptg/choice/main.c || cmake_object_order_depends_target_choice + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/choice/CMakeFiles/choice.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/choice -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + OBJECT_FILE_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/choice/CMakeFiles/choice.dir/ + TARGET_PDB = tests/dsl/ptg/choice/choice.pdb + +build tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_wrapper.c.o: C_COMPILER__choice_RelWithDebInfo ../tests/dsl/ptg/choice/choice_wrapper.c || cmake_object_order_depends_target_choice + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/choice -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + OBJECT_FILE_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/choice/CMakeFiles/choice.dir/ + TARGET_PDB = tests/dsl/ptg/choice/choice.pdb + +build tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_data.c.o: C_COMPILER__choice_RelWithDebInfo ../tests/dsl/ptg/choice/choice_data.c || cmake_object_order_depends_target_choice + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/choice -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + OBJECT_FILE_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/choice/CMakeFiles/choice.dir/ + TARGET_PDB = tests/dsl/ptg/choice/choice.pdb + +build tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice.c.o: C_COMPILER__choice_RelWithDebInfo tests/dsl/ptg/choice/choice.c || cmake_object_order_depends_target_choice + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/choice -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + OBJECT_FILE_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/choice/CMakeFiles/choice.dir/ + TARGET_PDB = tests/dsl/ptg/choice/choice.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target choice + + +############################################# +# Link the executable tests/dsl/ptg/choice/choice + +build tests/dsl/ptg/choice/choice: C_EXECUTABLE_LINKER__choice_RelWithDebInfo tests/dsl/ptg/choice/CMakeFiles/choice.dir/main.c.o tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_wrapper.c.o tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice_data.c.o tests/dsl/ptg/choice/CMakeFiles/choice.dir/choice.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/choice/ptgpp_choice.choice parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/choice/CMakeFiles/choice.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/choice/CMakeFiles/choice.dir/ + TARGET_FILE = tests/dsl/ptg/choice/choice + TARGET_PDB = tests/dsl/ptg/choice/choice.pdb + + +############################################# +# Utility command for ptgpp_choice.choice + +build tests/dsl/ptg/choice/ptgpp_choice.choice: phony tests/dsl/ptg/choice/CMakeFiles/ptgpp_choice.choice tests/dsl/ptg/choice/choice.h tests/dsl/ptg/choice/choice.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/choice/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/choice/edit_cache: phony tests/dsl/ptg/choice/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package + +build tests/dsl/ptg/choice/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/choice/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/package: phony tests/dsl/ptg/choice/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/choice/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/test: phony tests/dsl/ptg/choice/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/choice/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/package_source: phony tests/dsl/ptg/choice/CMakeFiles/package_source.util + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/choice/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/rebuild_cache: phony tests/dsl/ptg/choice/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/choice/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/choice/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/install/strip: phony tests/dsl/ptg/choice/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/choice/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/dsl/ptg/choice/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/choice/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/choice/install: phony tests/dsl/ptg/choice/CMakeFiles/install.util + + +############################################# +# Custom command for tests/dsl/ptg/choice/choice.h + +build tests/dsl/ptg/choice/choice.h tests/dsl/ptg/choice/choice.c: CUSTOM_COMMAND ../tests/dsl/ptg/choice/choice.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/choice/choice.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/choice && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/choice/choice.jdf -C choice.c -H choice.h -f choice + DESC = Generating choice.h, choice.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/choice/CMakeFiles/ptgpp_choice.choice + +build tests/dsl/ptg/choice/CMakeFiles/ptgpp_choice.choice: phony tests/dsl/ptg/choice/choice.h tests/dsl/ptg/choice/choice.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/controlgather/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/rebuild_cache: phony tests/dsl/ptg/controlgather/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for ptgpp_ctlgat.ctlgat + +build tests/dsl/ptg/controlgather/ptgpp_ctlgat.ctlgat: phony tests/dsl/ptg/controlgather/CMakeFiles/ptgpp_ctlgat.ctlgat tests/dsl/ptg/controlgather/ctlgat.h tests/dsl/ptg/controlgather/ctlgat.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/dsl/ptg/controlgather/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/controlgather/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/package: phony tests/dsl/ptg/controlgather/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/controlgather/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/test: phony tests/dsl/ptg/controlgather/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/controlgather/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/package_source: phony tests/dsl/ptg/controlgather/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target ctlgat + + +############################################# +# Order-only phony target for ctlgat + +build cmake_object_order_depends_target_ctlgat: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/controlgather/ctlgat.c tests/dsl/ptg/controlgather/ctlgat.h tests/dsl/ptg/controlgather/ptgpp_ctlgat.ctlgat + +build tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/main.c.o: C_COMPILER__ctlgat_RelWithDebInfo ../tests/dsl/ptg/controlgather/main.c || cmake_object_order_depends_target_ctlgat + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/controlgather -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + OBJECT_FILE_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ + TARGET_PDB = tests/dsl/ptg/controlgather/ctlgat.pdb + +build tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_wrapper.c.o: C_COMPILER__ctlgat_RelWithDebInfo ../tests/dsl/ptg/controlgather/ctlgat_wrapper.c || cmake_object_order_depends_target_ctlgat + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/controlgather -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + OBJECT_FILE_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ + TARGET_PDB = tests/dsl/ptg/controlgather/ctlgat.pdb + +build tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_data.c.o: C_COMPILER__ctlgat_RelWithDebInfo ../tests/dsl/ptg/controlgather/ctlgat_data.c || cmake_object_order_depends_target_ctlgat + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/controlgather -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + OBJECT_FILE_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ + TARGET_PDB = tests/dsl/ptg/controlgather/ctlgat.pdb + +build tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat.c.o: C_COMPILER__ctlgat_RelWithDebInfo tests/dsl/ptg/controlgather/ctlgat.c || cmake_object_order_depends_target_ctlgat + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/controlgather -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + OBJECT_FILE_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ + TARGET_PDB = tests/dsl/ptg/controlgather/ctlgat.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ctlgat + + +############################################# +# Link the executable tests/dsl/ptg/controlgather/ctlgat + +build tests/dsl/ptg/controlgather/ctlgat: C_EXECUTABLE_LINKER__ctlgat_RelWithDebInfo tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/main.c.o tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_wrapper.c.o tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat_data.c.o tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ctlgat.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/controlgather/ptgpp_ctlgat.ctlgat parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/controlgather/CMakeFiles/ctlgat.dir/ + TARGET_FILE = tests/dsl/ptg/controlgather/ctlgat + TARGET_PDB = tests/dsl/ptg/controlgather/ctlgat.pdb + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/controlgather/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/controlgather/edit_cache: phony tests/dsl/ptg/controlgather/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/controlgather/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/controlgather/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/install/strip: phony tests/dsl/ptg/controlgather/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/controlgather/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/dsl/ptg/controlgather/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/controlgather/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/install: phony tests/dsl/ptg/controlgather/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/controlgather/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/controlgather/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/controlgather/install/local: phony tests/dsl/ptg/controlgather/CMakeFiles/install/local.util + + +############################################# +# Phony custom command for tests/dsl/ptg/controlgather/CMakeFiles/ptgpp_ctlgat.ctlgat + +build tests/dsl/ptg/controlgather/CMakeFiles/ptgpp_ctlgat.ctlgat: phony tests/dsl/ptg/controlgather/ctlgat.h tests/dsl/ptg/controlgather/ctlgat.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/dsl/ptg/controlgather/ctlgat.h + +build tests/dsl/ptg/controlgather/ctlgat.h tests/dsl/ptg/controlgather/ctlgat.c: CUSTOM_COMMAND ../tests/dsl/ptg/controlgather/ctlgat.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/controlgather/ctlgat.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/controlgather/ctlgat.jdf -C ctlgat.c -H ctlgat.h -f ctlgat + DESC = Generating ctlgat.h, ctlgat.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target udf + + +############################################# +# Order-only phony target for udf + +build cmake_object_order_depends_target_udf: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/user-defined-functions/ptgpp_udf.udf tests/dsl/ptg/user-defined-functions/udf.c tests/dsl/ptg/user-defined-functions/udf.h + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/main.c.o: C_COMPILER__udf_RelWithDebInfo ../tests/dsl/ptg/user-defined-functions/main.c || cmake_object_order_depends_target_udf + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/dsl/ptg/user-defined-functions -Itests/dsl/ptg/user-defined-functions -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + OBJECT_FILE_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/ + TARGET_PDB = tests/dsl/ptg/user-defined-functions/udf.pdb + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf_wrapper.c.o: C_COMPILER__udf_RelWithDebInfo ../tests/dsl/ptg/user-defined-functions/udf_wrapper.c || cmake_object_order_depends_target_udf + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/dsl/ptg/user-defined-functions -Itests/dsl/ptg/user-defined-functions -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + OBJECT_FILE_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/ + TARGET_PDB = tests/dsl/ptg/user-defined-functions/udf.pdb + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf.c.o: C_COMPILER__udf_RelWithDebInfo tests/dsl/ptg/user-defined-functions/udf.c || cmake_object_order_depends_target_udf + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/dsl/ptg/user-defined-functions -Itests/dsl/ptg/user-defined-functions -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + OBJECT_FILE_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/ + TARGET_PDB = tests/dsl/ptg/user-defined-functions/udf.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target udf + + +############################################# +# Link the executable tests/dsl/ptg/user-defined-functions/udf + +build tests/dsl/ptg/user-defined-functions/udf: C_EXECUTABLE_LINKER__udf_RelWithDebInfo tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/main.c.o tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf_wrapper.c.o tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/udf.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/user-defined-functions/ptgpp_udf.udf parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/user-defined-functions/CMakeFiles/udf.dir/ + TARGET_FILE = tests/dsl/ptg/user-defined-functions/udf + TARGET_PDB = tests/dsl/ptg/user-defined-functions/udf.pdb + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/package_source: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/package_source.util + + +############################################# +# Utility command for package + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/user-defined-functions/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/package: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/test: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/user-defined-functions/edit_cache: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/rebuild_cache: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/user-defined-functions/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/install/strip: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/user-defined-functions/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/user-defined-functions/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/install: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/install.util + + +############################################# +# Utility command for ptgpp_udf.udf + +build tests/dsl/ptg/user-defined-functions/ptgpp_udf.udf: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/ptgpp_udf.udf tests/dsl/ptg/user-defined-functions/udf.h tests/dsl/ptg/user-defined-functions/udf.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/user-defined-functions/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/user-defined-functions/install/local: phony tests/dsl/ptg/user-defined-functions/CMakeFiles/install/local.util + + +############################################# +# Custom command for tests/dsl/ptg/user-defined-functions/udf.h + +build tests/dsl/ptg/user-defined-functions/udf.h tests/dsl/ptg/user-defined-functions/udf.c: CUSTOM_COMMAND ../tests/dsl/ptg/user-defined-functions/udf.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/user-defined-functions/udf.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/user-defined-functions/udf.jdf -C udf.c -H udf.h -f udf + DESC = Generating udf.h, udf.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/user-defined-functions/CMakeFiles/ptgpp_udf.udf + +build tests/dsl/ptg/user-defined-functions/CMakeFiles/ptgpp_udf.udf: phony tests/dsl/ptg/user-defined-functions/udf.h tests/dsl/ptg/user-defined-functions/udf.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/cuda/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/cuda/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/install/strip: phony tests/dsl/ptg/cuda/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/cuda/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/cuda/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/install/local: phony tests/dsl/ptg/cuda/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tests/dsl/ptg/cuda/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/cuda/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/install: phony tests/dsl/ptg/cuda/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/cuda/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/cuda/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/rebuild_cache: phony tests/dsl/ptg/cuda/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/cuda/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/cuda/edit_cache: phony tests/dsl/ptg/cuda/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/cuda/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/package_source: phony tests/dsl/ptg/cuda/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/cuda/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/cuda && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/test: phony tests/dsl/ptg/cuda/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build tests/dsl/ptg/cuda/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/cuda/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/cuda/package: phony tests/dsl/ptg/cuda/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/dsl/ptg/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target local_indices + + +############################################# +# Order-only phony target for local_indices + +build cmake_object_order_depends_target_local_indices: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/dsl/ptg/local-indices/local_indices.c tests/dsl/ptg/local-indices/local_indices.h tests/dsl/ptg/local-indices/ptgpp_local_indices.local_indices + +build tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir/local_indices.c.o: C_COMPILER__local_indices_RelWithDebInfo tests/dsl/ptg/local-indices/local_indices.c || cmake_object_order_depends_target_local_indices + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir/local_indices.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/dsl/ptg/local-indices -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir + OBJECT_FILE_DIR = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir + TARGET_COMPILE_PDB = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir/ + TARGET_PDB = tests/dsl/ptg/local-indices/local_indices.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target local_indices + + +############################################# +# Link the executable tests/dsl/ptg/local-indices/local_indices + +build tests/dsl/ptg/local-indices/local_indices: C_EXECUTABLE_LINKER__local_indices_RelWithDebInfo tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir/local_indices.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/dsl/ptg/local-indices/ptgpp_local_indices.local_indices parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/ptg/local-indices/CMakeFiles/local_indices.dir/ + TARGET_FILE = tests/dsl/ptg/local-indices/local_indices + TARGET_PDB = tests/dsl/ptg/local-indices/local_indices.pdb + + +############################################# +# Utility command for ptgpp_local_indices.local_indices + +build tests/dsl/ptg/local-indices/ptgpp_local_indices.local_indices: phony tests/dsl/ptg/local-indices/CMakeFiles/ptgpp_local_indices.local_indices tests/dsl/ptg/local-indices/local_indices.h tests/dsl/ptg/local-indices/local_indices.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/dsl/ptg/local-indices/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/ptg/local-indices/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/package: phony tests/dsl/ptg/local-indices/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/dsl/ptg/local-indices/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/test: phony tests/dsl/ptg/local-indices/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/dsl/ptg/local-indices/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/package_source: phony tests/dsl/ptg/local-indices/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/dsl/ptg/local-indices/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/ptg/local-indices/edit_cache: phony tests/dsl/ptg/local-indices/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/ptg/local-indices/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/rebuild_cache: phony tests/dsl/ptg/local-indices/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/dsl/ptg/local-indices/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/ptg/local-indices/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/install/strip: phony tests/dsl/ptg/local-indices/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/ptg/local-indices/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/dsl/ptg/local-indices/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/ptg/local-indices/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/install: phony tests/dsl/ptg/local-indices/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/dsl/ptg/local-indices/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/ptg/local-indices/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/ptg/local-indices/install/local: phony tests/dsl/ptg/local-indices/CMakeFiles/install/local.util + + +############################################# +# Custom command for tests/dsl/ptg/local-indices/local_indices.h + +build tests/dsl/ptg/local-indices/local_indices.h tests/dsl/ptg/local-indices/local_indices.c: CUSTOM_COMMAND ../tests/dsl/ptg/local-indices/local_indices.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/dsl/ptg/local-indices/local_indices.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/dsl/ptg/local-indices/local_indices.jdf -C local_indices.c -H local_indices.h -f local_indices + DESC = Generating local_indices.h, local_indices.c + restat = 1 + + +############################################# +# Phony custom command for tests/dsl/ptg/local-indices/CMakeFiles/ptgpp_local_indices.local_indices + +build tests/dsl/ptg/local-indices/CMakeFiles/ptgpp_local_indices.local_indices: phony tests/dsl/ptg/local-indices/local_indices.h tests/dsl/ptg/local-indices/local_indices.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target reduce + + +############################################# +# Order-only phony target for reduce + +build cmake_object_order_depends_target_reduce: phony || cmake_object_order_depends_target_parsec + +build tests/collections/CMakeFiles/reduce.dir/reduce.c.o: C_COMPILER__reduce_RelWithDebInfo ../tests/collections/reduce.c || cmake_object_order_depends_target_reduce + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/CMakeFiles/reduce.dir/reduce.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/CMakeFiles/reduce.dir + OBJECT_FILE_DIR = tests/collections/CMakeFiles/reduce.dir + TARGET_COMPILE_PDB = tests/collections/CMakeFiles/reduce.dir/ + TARGET_PDB = tests/collections/reduce.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target reduce + + +############################################# +# Link the executable tests/collections/reduce + +build tests/collections/reduce: C_EXECUTABLE_LINKER__reduce_RelWithDebInfo tests/collections/CMakeFiles/reduce.dir/reduce.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/CMakeFiles/reduce.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/CMakeFiles/reduce.dir/ + TARGET_FILE = tests/collections/reduce + TARGET_PDB = tests/collections/reduce.pdb + + +############################################# +# Utility command for ptgpp_kcyclic.kcyclic + +build tests/collections/ptgpp_kcyclic.kcyclic: phony tests/collections/CMakeFiles/ptgpp_kcyclic.kcyclic tests/collections/kcyclic.h tests/collections/kcyclic.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build tests/collections/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/collections/package_source: phony tests/collections/CMakeFiles/package_source.util + + +############################################# +# Utility command for install/local + +build tests/collections/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/collections/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/collections/install/local: phony tests/collections/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target kcyclic + + +############################################# +# Order-only phony target for kcyclic + +build cmake_object_order_depends_target_kcyclic: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/kcyclic.c tests/collections/kcyclic.h tests/collections/ptgpp_kcyclic.kcyclic + +build tests/collections/CMakeFiles/kcyclic.dir/kcyclic.c.o: C_COMPILER__kcyclic_RelWithDebInfo tests/collections/kcyclic.c || cmake_object_order_depends_target_kcyclic + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/CMakeFiles/kcyclic.dir/kcyclic.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections -Iparsec/include/fortran + OBJECT_DIR = tests/collections/CMakeFiles/kcyclic.dir + OBJECT_FILE_DIR = tests/collections/CMakeFiles/kcyclic.dir + TARGET_COMPILE_PDB = tests/collections/CMakeFiles/kcyclic.dir/ + TARGET_PDB = tests/collections/kcyclic.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target kcyclic + + +############################################# +# Link the executable tests/collections/kcyclic + +build tests/collections/kcyclic: C_EXECUTABLE_LINKER__kcyclic_RelWithDebInfo tests/collections/CMakeFiles/kcyclic.dir/kcyclic.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/ptgpp_kcyclic.kcyclic parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/CMakeFiles/kcyclic.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/CMakeFiles/kcyclic.dir/ + TARGET_FILE = tests/collections/kcyclic + TARGET_PDB = tests/collections/kcyclic.pdb + + +############################################# +# Utility command for package + +build tests/collections/CMakeFiles/package.util: CUSTOM_COMMAND tests/collections/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/collections/package: phony tests/collections/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/collections/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/collections/test: phony tests/collections/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build tests/collections/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/collections/edit_cache: phony tests/collections/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/collections/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/collections/rebuild_cache: phony tests/collections/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/collections/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/collections/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/collections/install/strip: phony tests/collections/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/collections/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/collections/CMakeFiles/install.util: CUSTOM_COMMAND tests/collections/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/collections/install: phony tests/collections/CMakeFiles/install.util + + +############################################# +# Phony custom command for tests/collections/CMakeFiles/ptgpp_kcyclic.kcyclic + +build tests/collections/CMakeFiles/ptgpp_kcyclic.kcyclic: phony tests/collections/kcyclic.h tests/collections/kcyclic.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/kcyclic.h + +build tests/collections/kcyclic.h tests/collections/kcyclic.c: CUSTOM_COMMAND ../tests/collections/kcyclic.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/kcyclic.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/kcyclic.jdf -C kcyclic.c -H kcyclic.h -f kcyclic + DESC = Generating kcyclic.h, kcyclic.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/collections/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/collections/two_dim_band/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/collections/two_dim_band/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/collections/two_dim_band/install/local: phony tests/collections/two_dim_band/CMakeFiles/install/local.util + + +############################################# +# Utility command for rebuild_cache + +build tests/collections/two_dim_band/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/collections/two_dim_band/rebuild_cache: phony tests/collections/two_dim_band/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for ptgpp_testing_band.two_dim_band + +build tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band: phony tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band tests/collections/two_dim_band/two_dim_band.h tests/collections/two_dim_band/two_dim_band.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_testing_band.two_dim_band_free + +build tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band_free: phony tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band_free tests/collections/two_dim_band/two_dim_band_free.h tests/collections/two_dim_band/two_dim_band_free.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/collections/two_dim_band/CMakeFiles/package.util: CUSTOM_COMMAND tests/collections/two_dim_band/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/collections/two_dim_band/package: phony tests/collections/two_dim_band/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/collections/two_dim_band/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/collections/two_dim_band/test: phony tests/collections/two_dim_band/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/collections/two_dim_band/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/collections/two_dim_band/package_source: phony tests/collections/two_dim_band/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target testing_band + + +############################################# +# Order-only phony target for testing_band + +build cmake_object_order_depends_target_testing_band: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band_free tests/collections/two_dim_band/two_dim_band.c tests/collections/two_dim_band/two_dim_band.h tests/collections/two_dim_band/two_dim_band_free.c tests/collections/two_dim_band/two_dim_band_free.h + +build tests/collections/two_dim_band/CMakeFiles/testing_band.dir/main.c.o: C_COMPILER__testing_band_RelWithDebInfo ../tests/collections/two_dim_band/main.c || cmake_object_order_depends_target_testing_band + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/collections/two_dim_band -Itests/collections/two_dim_band -Iparsec/include/fortran + OBJECT_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + OBJECT_FILE_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + TARGET_COMPILE_PDB = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/ + TARGET_PDB = tests/collections/two_dim_band/testing_band.pdb + +build tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band.c.o: C_COMPILER__testing_band_RelWithDebInfo tests/collections/two_dim_band/two_dim_band.c || cmake_object_order_depends_target_testing_band + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/collections/two_dim_band -Itests/collections/two_dim_band -Iparsec/include/fortran + OBJECT_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + OBJECT_FILE_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + TARGET_COMPILE_PDB = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/ + TARGET_PDB = tests/collections/two_dim_band/testing_band.pdb + +build tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band_free.c.o: C_COMPILER__testing_band_RelWithDebInfo tests/collections/two_dim_band/two_dim_band_free.c || cmake_object_order_depends_target_testing_band + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band_free.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/collections/two_dim_band -Itests/collections/two_dim_band -Iparsec/include/fortran + OBJECT_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + OBJECT_FILE_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + TARGET_COMPILE_PDB = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/ + TARGET_PDB = tests/collections/two_dim_band/testing_band.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target testing_band + + +############################################# +# Link the executable tests/collections/two_dim_band/testing_band + +build tests/collections/two_dim_band/testing_band: C_EXECUTABLE_LINKER__testing_band_RelWithDebInfo tests/collections/two_dim_band/CMakeFiles/testing_band.dir/main.c.o tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band.c.o tests/collections/two_dim_band/CMakeFiles/testing_band.dir/two_dim_band_free.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band_free parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/two_dim_band/CMakeFiles/testing_band.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/two_dim_band/CMakeFiles/testing_band.dir/ + TARGET_FILE = tests/collections/two_dim_band/testing_band + TARGET_PDB = tests/collections/two_dim_band/testing_band.pdb + + +############################################# +# Utility command for edit_cache + +build tests/collections/two_dim_band/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/collections/two_dim_band/edit_cache: phony tests/collections/two_dim_band/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/collections/two_dim_band/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/collections/two_dim_band/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/collections/two_dim_band/install/strip: phony tests/collections/two_dim_band/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/collections/two_dim_band/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/collections/two_dim_band/CMakeFiles/install.util: CUSTOM_COMMAND tests/collections/two_dim_band/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/collections/two_dim_band/install: phony tests/collections/two_dim_band/CMakeFiles/install.util + + +############################################# +# Phony custom command for tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band + +build tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band: phony tests/collections/two_dim_band/two_dim_band.h tests/collections/two_dim_band/two_dim_band.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/two_dim_band/two_dim_band.h + +build tests/collections/two_dim_band/two_dim_band.h tests/collections/two_dim_band/two_dim_band.c: CUSTOM_COMMAND ../tests/collections/two_dim_band/two_dim_band.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/two_dim_band/two_dim_band.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/two_dim_band/two_dim_band.jdf -C two_dim_band.c -H two_dim_band.h -f two_dim_band + DESC = Generating two_dim_band.h, two_dim_band.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band_free + +build tests/collections/two_dim_band/CMakeFiles/ptgpp_testing_band.two_dim_band_free: phony tests/collections/two_dim_band/two_dim_band_free.h tests/collections/two_dim_band/two_dim_band_free.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/two_dim_band/two_dim_band_free.h + +build tests/collections/two_dim_band/two_dim_band_free.h tests/collections/two_dim_band/two_dim_band_free.c: CUSTOM_COMMAND ../tests/collections/two_dim_band/two_dim_band_free.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/two_dim_band/two_dim_band_free.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/two_dim_band && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/two_dim_band/two_dim_band_free.jdf -C two_dim_band_free.c -H two_dim_band_free.h -f two_dim_band_free + DESC = Generating two_dim_band_free.h, two_dim_band_free.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/collections/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install + +build tests/collections/redistribute/CMakeFiles/install.util: CUSTOM_COMMAND tests/collections/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/collections/redistribute/install: phony tests/collections/redistribute/CMakeFiles/install.util + + +############################################# +# Utility command for rebuild_cache + +build tests/collections/redistribute/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/collections/redistribute/rebuild_cache: phony tests/collections/redistribute/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tests/collections/redistribute/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/collections/redistribute/edit_cache: phony tests/collections/redistribute/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for test + +build tests/collections/redistribute/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/collections/redistribute/test: phony tests/collections/redistribute/CMakeFiles/test.util + +# ============================================================================= +# Object build statements for EXECUTABLE target testing_redistribute + + +############################################# +# Order-only phony target for testing_redistribute + +build cmake_object_order_depends_target_testing_redistribute: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_bound tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check2 tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_no_optimization tests/collections/redistribute/redistribute_bound.c tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_check.c tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check2.c tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_no_optimization.c tests/collections/redistribute/redistribute_no_optimization.h + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/testing_redistribute.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo ../tests/collections/redistribute/testing_redistribute.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/testing_redistribute.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/common.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo ../tests/collections/redistribute/common.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo tests/collections/redistribute/redistribute_check.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check2.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo tests/collections/redistribute/redistribute_check2.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check2.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_bound.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo tests/collections/redistribute/redistribute_bound.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_bound.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_no_optimization.c.o: C_COMPILER__testing_redistribute_RelWithDebInfo tests/collections/redistribute/redistribute_no_optimization.c || cmake_object_order_depends_target_testing_redistribute + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_no_optimization.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target testing_redistribute + + +############################################# +# Link the executable tests/collections/redistribute/testing_redistribute + +build tests/collections/redistribute/testing_redistribute: C_EXECUTABLE_LINKER__testing_redistribute_RelWithDebInfo tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/testing_redistribute.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/common.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_check2.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_bound.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/redistribute_no_optimization.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_bound tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check2 tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_no_optimization parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute.dir/ + TARGET_FILE = tests/collections/redistribute/testing_redistribute + TARGET_PDB = tests/collections/redistribute/testing_redistribute.pdb + + +############################################# +# Utility command for ptgpp_testing_redistribute_random.redistribute_check + +build tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_testing_redistribute.redistribute_check + +build tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install/local + +build tests/collections/redistribute/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/collections/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/collections/redistribute/install/local: phony tests/collections/redistribute/CMakeFiles/install/local.util + + +############################################# +# Utility command for ptgpp_testing_redistribute_random.redistribute_no_optimization + +build tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_no_optimization: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_no_optimization tests/collections/redistribute/redistribute_no_optimization.h tests/collections/redistribute/redistribute_no_optimization.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install/strip + +build tests/collections/redistribute/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/collections/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/collections/redistribute/install/strip: phony tests/collections/redistribute/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/collections/redistribute/list_install_components: phony + + +############################################# +# Utility command for ptgpp_testing_redistribute.redistribute_check2 + +build tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check2: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check2 tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_check2.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_testing_redistribute_random.redistribute_bound + +build tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_bound: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_bound tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_bound.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build tests/collections/redistribute/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/collections/redistribute/package_source: phony tests/collections/redistribute/CMakeFiles/package_source.util + + +############################################# +# Utility command for ptgpp_testing_redistribute.redistribute_bound + +build tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_bound: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_bound tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_bound.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/collections/redistribute/CMakeFiles/package.util: CUSTOM_COMMAND tests/collections/redistribute/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/collections/redistribute/package: phony tests/collections/redistribute/CMakeFiles/package.util + + +############################################# +# Utility command for ptgpp_testing_redistribute.redistribute_no_optimization + +build tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_no_optimization: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_no_optimization tests/collections/redistribute/redistribute_no_optimization.h tests/collections/redistribute/redistribute_no_optimization.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target testing_redistribute_random + + +############################################# +# Order-only phony target for testing_redistribute_random + +build cmake_object_order_depends_target_testing_redistribute_random: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_bound tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check2 tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_no_optimization tests/collections/redistribute/redistribute_bound.c tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_check.c tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check2.c tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_no_optimization.c tests/collections/redistribute/redistribute_no_optimization.h + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/testing_redistribute_random.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo ../tests/collections/redistribute/testing_redistribute_random.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/testing_redistribute_random.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/common.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo ../tests/collections/redistribute/common.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo tests/collections/redistribute/redistribute_check.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check2.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo tests/collections/redistribute/redistribute_check2.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check2.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_bound.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo tests/collections/redistribute/redistribute_bound.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_bound.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + +build tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_no_optimization.c.o: C_COMPILER__testing_redistribute_random_RelWithDebInfo tests/collections/redistribute/redistribute_no_optimization.c || cmake_object_order_depends_target_testing_redistribute_random + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_no_optimization.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Itests/collections/redistribute -I../tests/collections/redistribute -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + OBJECT_FILE_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target testing_redistribute_random + + +############################################# +# Link the executable tests/collections/redistribute/testing_redistribute_random + +build tests/collections/redistribute/testing_redistribute_random: C_EXECUTABLE_LINKER__testing_redistribute_random_RelWithDebInfo tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/testing_redistribute_random.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/common.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_check2.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_bound.c.o tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/redistribute_no_optimization.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_bound tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check2 tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_no_optimization parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/redistribute/CMakeFiles/testing_redistribute_random.dir/ + TARGET_FILE = tests/collections/redistribute/testing_redistribute_random + TARGET_PDB = tests/collections/redistribute/testing_redistribute_random.pdb + + +############################################# +# Utility command for ptgpp_testing_redistribute_random.redistribute_check2 + +build tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check2: phony tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check2 tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_check2.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Custom command for tests/collections/redistribute/redistribute_check.h + +build tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check.c: CUSTOM_COMMAND ../tests/collections/redistribute/redistribute_check.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/redistribute/redistribute_check.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/redistribute/redistribute_check.jdf -C redistribute_check.c -H redistribute_check.h -f redistribute_check + DESC = Generating redistribute_check.h, redistribute_check.c + restat = 1 + + +############################################# +# Custom command for tests/collections/redistribute/redistribute_check2.h + +build tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_check2.c: CUSTOM_COMMAND ../tests/collections/redistribute/redistribute_check2.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/redistribute/redistribute_check2.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/redistribute/redistribute_check2.jdf -C redistribute_check2.c -H redistribute_check2.h -f redistribute_check2 + DESC = Generating redistribute_check2.h, redistribute_check2.c + restat = 1 + + +############################################# +# Custom command for tests/collections/redistribute/redistribute_bound.h + +build tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_bound.c: CUSTOM_COMMAND ../tests/collections/redistribute/redistribute_bound.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/redistribute/redistribute_bound.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/redistribute/redistribute_bound.jdf -C redistribute_bound.c -H redistribute_bound.h -f redistribute_bound + DESC = Generating redistribute_bound.h, redistribute_bound.c + restat = 1 + + +############################################# +# Custom command for tests/collections/redistribute/redistribute_no_optimization.h + +build tests/collections/redistribute/redistribute_no_optimization.h tests/collections/redistribute/redistribute_no_optimization.c: CUSTOM_COMMAND ../tests/collections/redistribute/redistribute_no_optimization.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/redistribute/redistribute_no_optimization.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/redistribute && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/redistribute/redistribute_no_optimization.jdf -C redistribute_no_optimization.c -H redistribute_no_optimization.h -f redistribute_no_optimization + DESC = Generating redistribute_no_optimization.h, redistribute_no_optimization.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check: phony tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check: phony tests/collections/redistribute/redistribute_check.h tests/collections/redistribute/redistribute_check.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_no_optimization + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_no_optimization: phony tests/collections/redistribute/redistribute_no_optimization.h tests/collections/redistribute/redistribute_no_optimization.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check2 + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_check2: phony tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_check2.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_bound + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_bound: phony tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_bound.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_bound + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_bound: phony tests/collections/redistribute/redistribute_bound.h tests/collections/redistribute/redistribute_bound.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_no_optimization + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute.redistribute_no_optimization: phony tests/collections/redistribute/redistribute_no_optimization.h tests/collections/redistribute/redistribute_no_optimization.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check2 + +build tests/collections/redistribute/CMakeFiles/ptgpp_testing_redistribute_random.redistribute_check2: phony tests/collections/redistribute/redistribute_check2.h tests/collections/redistribute/redistribute_check2.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/collections/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/collections/reshape/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/collections/reshape/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/collections/reshape/install/local: phony tests/collections/reshape/CMakeFiles/install/local.util + + +############################################# +# Utility command for ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape + +build tests/collections/reshape/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape tests/collections/reshape/input_dep_single_copy_reshape.h tests/collections/reshape/input_dep_single_copy_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.remote_no_re_reshape + +build tests/collections/reshape/ptgpp_reshape.remote_no_re_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_no_re_reshape tests/collections/reshape/remote_no_re_reshape.h tests/collections/reshape/remote_no_re_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for rebuild_cache + +build tests/collections/reshape/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/collections/reshape/rebuild_cache: phony tests/collections/reshape/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target avoidable_reshape + + +############################################# +# Order-only phony target for avoidable_reshape + +build cmake_object_order_depends_target_avoidable_reshape: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/reshape/avoidable_reshape.c tests/collections/reshape/avoidable_reshape.h tests/collections/reshape/ptgpp_avoidable_reshape.avoidable_reshape + +build tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/testing_avoidable_reshape.c.o: C_COMPILER__avoidable_reshape_RelWithDebInfo ../tests/collections/reshape/testing_avoidable_reshape.c || cmake_object_order_depends_target_avoidable_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/testing_avoidable_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/ + TARGET_PDB = tests/collections/reshape/avoidable_reshape.pdb + +build tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/common.c.o: C_COMPILER__avoidable_reshape_RelWithDebInfo ../tests/collections/reshape/common.c || cmake_object_order_depends_target_avoidable_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/ + TARGET_PDB = tests/collections/reshape/avoidable_reshape.pdb + +build tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/avoidable_reshape.c.o: C_COMPILER__avoidable_reshape_RelWithDebInfo tests/collections/reshape/avoidable_reshape.c || cmake_object_order_depends_target_avoidable_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/avoidable_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/ + TARGET_PDB = tests/collections/reshape/avoidable_reshape.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target avoidable_reshape + + +############################################# +# Link the executable tests/collections/reshape/avoidable_reshape + +build tests/collections/reshape/avoidable_reshape: C_EXECUTABLE_LINKER__avoidable_reshape_RelWithDebInfo tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/testing_avoidable_reshape.c.o tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/common.c.o tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/avoidable_reshape.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/reshape/ptgpp_avoidable_reshape.avoidable_reshape parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/avoidable_reshape.dir/ + TARGET_FILE = tests/collections/reshape/avoidable_reshape + TARGET_PDB = tests/collections/reshape/avoidable_reshape.pdb + + +############################################# +# Utility command for install + +build tests/collections/reshape/CMakeFiles/install.util: CUSTOM_COMMAND tests/collections/reshape/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/collections/reshape/install: phony tests/collections/reshape/CMakeFiles/install.util + + +############################################# +# Utility command for ptgpp_reshape.local_input_reshape + +build tests/collections/reshape/ptgpp_reshape.local_input_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_reshape tests/collections/reshape/local_input_reshape.h tests/collections/reshape/local_input_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.local_output_reshape + +build tests/collections/reshape/ptgpp_reshape.local_output_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_output_reshape tests/collections/reshape/local_output_reshape.h tests/collections/reshape/local_output_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.remote_read_reshape + +build tests/collections/reshape/ptgpp_reshape.remote_read_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_read_reshape tests/collections/reshape/remote_read_reshape.h tests/collections/reshape/remote_read_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.local_read_reshape + +build tests/collections/reshape/ptgpp_reshape.local_read_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_read_reshape tests/collections/reshape/local_read_reshape.h tests/collections/reshape/local_read_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.local_no_reshape + +build tests/collections/reshape/ptgpp_reshape.local_no_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_no_reshape tests/collections/reshape/local_no_reshape.h tests/collections/reshape/local_no_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/collections/reshape/CMakeFiles/package.util: CUSTOM_COMMAND tests/collections/reshape/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/collections/reshape/package: phony tests/collections/reshape/CMakeFiles/package.util + +# ============================================================================= +# Object build statements for EXECUTABLE target reshape + + +############################################# +# Order-only phony target for reshape + +build cmake_object_order_depends_target_reshape: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/reshape/local_input_LU_LL.c tests/collections/reshape/local_input_LU_LL.h tests/collections/reshape/local_input_reshape.c tests/collections/reshape/local_input_reshape.h tests/collections/reshape/local_no_reshape.c tests/collections/reshape/local_no_reshape.h tests/collections/reshape/local_output_reshape.c tests/collections/reshape/local_output_reshape.h tests/collections/reshape/local_read_reshape.c tests/collections/reshape/local_read_reshape.h tests/collections/reshape/ptgpp_reshape.local_input_LU_LL tests/collections/reshape/ptgpp_reshape.local_input_reshape tests/collections/reshape/ptgpp_reshape.local_no_reshape tests/collections/reshape/ptgpp_reshape.local_output_reshape tests/collections/reshape/ptgpp_reshape.local_read_reshape tests/collections/reshape/ptgpp_reshape.remote_no_re_reshape tests/collections/reshape/ptgpp_reshape.remote_read_reshape tests/collections/reshape/remote_no_re_reshape.c tests/collections/reshape/remote_no_re_reshape.h tests/collections/reshape/remote_read_reshape.c tests/collections/reshape/remote_read_reshape.h + +build tests/collections/reshape/CMakeFiles/reshape.dir/testing_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo ../tests/collections/reshape/testing_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/testing_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/common.c.o: C_COMPILER__reshape_RelWithDebInfo ../tests/collections/reshape/common.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/local_no_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/local_no_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/local_no_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/local_read_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/local_read_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/local_read_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/local_output_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/local_output_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/local_output_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/local_input_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/local_input_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/local_input_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/remote_read_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/remote_read_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/remote_read_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/remote_no_re_reshape.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/remote_no_re_reshape.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/remote_no_re_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + +build tests/collections/reshape/CMakeFiles/reshape.dir/local_input_LU_LL.c.o: C_COMPILER__reshape_RelWithDebInfo tests/collections/reshape/local_input_LU_LL.c || cmake_object_order_depends_target_reshape + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/reshape.dir/local_input_LU_LL.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_PDB = tests/collections/reshape/reshape.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target reshape + + +############################################# +# Link the executable tests/collections/reshape/reshape + +build tests/collections/reshape/reshape: C_EXECUTABLE_LINKER__reshape_RelWithDebInfo tests/collections/reshape/CMakeFiles/reshape.dir/testing_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/common.c.o tests/collections/reshape/CMakeFiles/reshape.dir/local_no_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/local_read_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/local_output_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/local_input_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/remote_read_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/remote_no_re_reshape.c.o tests/collections/reshape/CMakeFiles/reshape.dir/local_input_LU_LL.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/reshape/ptgpp_reshape.local_input_LU_LL tests/collections/reshape/ptgpp_reshape.local_input_reshape tests/collections/reshape/ptgpp_reshape.local_no_reshape tests/collections/reshape/ptgpp_reshape.local_output_reshape tests/collections/reshape/ptgpp_reshape.local_read_reshape tests/collections/reshape/ptgpp_reshape.remote_no_re_reshape tests/collections/reshape/ptgpp_reshape.remote_read_reshape parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/reshape/CMakeFiles/reshape.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/reshape.dir/ + TARGET_FILE = tests/collections/reshape/reshape + TARGET_PDB = tests/collections/reshape/reshape.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target remote_multiple_outs_same_pred_flow + + +############################################# +# Order-only phony target for remote_multiple_outs_same_pred_flow + +build cmake_object_order_depends_target_remote_multiple_outs_same_pred_flow: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps tests/collections/reshape/remote_multiple_outs_same_pred_flow.c tests/collections/reshape/remote_multiple_outs_same_pred_flow.h tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.c tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.h + +build tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/testing_remote_multiple_outs_same_pred_flow.c.o: C_COMPILER__remote_multiple_outs_same_pred_flow_RelWithDebInfo ../tests/collections/reshape/testing_remote_multiple_outs_same_pred_flow.c || cmake_object_order_depends_target_remote_multiple_outs_same_pred_flow + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/testing_remote_multiple_outs_same_pred_flow.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/ + TARGET_PDB = tests/collections/reshape/remote_multiple_outs_same_pred_flow.pdb + +build tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/common.c.o: C_COMPILER__remote_multiple_outs_same_pred_flow_RelWithDebInfo ../tests/collections/reshape/common.c || cmake_object_order_depends_target_remote_multiple_outs_same_pred_flow + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/ + TARGET_PDB = tests/collections/reshape/remote_multiple_outs_same_pred_flow.pdb + +build tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow.c.o: C_COMPILER__remote_multiple_outs_same_pred_flow_RelWithDebInfo tests/collections/reshape/remote_multiple_outs_same_pred_flow.c || cmake_object_order_depends_target_remote_multiple_outs_same_pred_flow + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/ + TARGET_PDB = tests/collections/reshape/remote_multiple_outs_same_pred_flow.pdb + +build tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow_multiple_deps.c.o: C_COMPILER__remote_multiple_outs_same_pred_flow_RelWithDebInfo tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.c || cmake_object_order_depends_target_remote_multiple_outs_same_pred_flow + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow_multiple_deps.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/ + TARGET_PDB = tests/collections/reshape/remote_multiple_outs_same_pred_flow.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target remote_multiple_outs_same_pred_flow + + +############################################# +# Link the executable tests/collections/reshape/remote_multiple_outs_same_pred_flow + +build tests/collections/reshape/remote_multiple_outs_same_pred_flow: C_EXECUTABLE_LINKER__remote_multiple_outs_same_pred_flow_RelWithDebInfo tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/testing_remote_multiple_outs_same_pred_flow.c.o tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/common.c.o tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow.c.o tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/remote_multiple_outs_same_pred_flow_multiple_deps.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/remote_multiple_outs_same_pred_flow.dir/ + TARGET_FILE = tests/collections/reshape/remote_multiple_outs_same_pred_flow + TARGET_PDB = tests/collections/reshape/remote_multiple_outs_same_pred_flow.pdb + + +############################################# +# Utility command for ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow + +build tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow: phony tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow tests/collections/reshape/remote_multiple_outs_same_pred_flow.h tests/collections/reshape/remote_multiple_outs_same_pred_flow.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for edit_cache + +build tests/collections/reshape/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/collections/reshape/edit_cache: phony tests/collections/reshape/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/collections/reshape/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/collections/reshape/all + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/collections/reshape/install/strip: phony tests/collections/reshape/CMakeFiles/install/strip.util + +# ============================================================================= +# Object build statements for EXECUTABLE target input_dep_reshape_single_copy + + +############################################# +# Order-only phony target for input_dep_reshape_single_copy + +build cmake_object_order_depends_target_input_dep_reshape_single_copy: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/collections/reshape/input_dep_single_copy_reshape.c tests/collections/reshape/input_dep_single_copy_reshape.h tests/collections/reshape/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape + +build tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/testing_input_dep_reshape_single_copy.c.o: C_COMPILER__input_dep_reshape_single_copy_RelWithDebInfo ../tests/collections/reshape/testing_input_dep_reshape_single_copy.c || cmake_object_order_depends_target_input_dep_reshape_single_copy + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/testing_input_dep_reshape_single_copy.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/ + TARGET_PDB = tests/collections/reshape/input_dep_reshape_single_copy.pdb + +build tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/common.c.o: C_COMPILER__input_dep_reshape_single_copy_RelWithDebInfo ../tests/collections/reshape/common.c || cmake_object_order_depends_target_input_dep_reshape_single_copy + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/common.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/ + TARGET_PDB = tests/collections/reshape/input_dep_reshape_single_copy.pdb + +build tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/input_dep_single_copy_reshape.c.o: C_COMPILER__input_dep_reshape_single_copy_RelWithDebInfo tests/collections/reshape/input_dep_single_copy_reshape.c || cmake_object_order_depends_target_input_dep_reshape_single_copy + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/input_dep_single_copy_reshape.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/collections/reshape -Iparsec/include/fortran + OBJECT_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + OBJECT_FILE_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/ + TARGET_PDB = tests/collections/reshape/input_dep_reshape_single_copy.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target input_dep_reshape_single_copy + + +############################################# +# Link the executable tests/collections/reshape/input_dep_reshape_single_copy + +build tests/collections/reshape/input_dep_reshape_single_copy: C_EXECUTABLE_LINKER__input_dep_reshape_single_copy_RelWithDebInfo tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/testing_input_dep_reshape_single_copy.c.o tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/common.c.o tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/input_dep_single_copy_reshape.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/collections/reshape/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/collections/reshape/CMakeFiles/input_dep_reshape_single_copy.dir/ + TARGET_FILE = tests/collections/reshape/input_dep_reshape_single_copy + TARGET_PDB = tests/collections/reshape/input_dep_reshape_single_copy.pdb + + +############################################# +# Utility command for ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps + +build tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps: phony tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.h tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_reshape.local_input_LU_LL + +build tests/collections/reshape/ptgpp_reshape.local_input_LU_LL: phony tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_LU_LL tests/collections/reshape/local_input_LU_LL.h tests/collections/reshape/local_input_LU_LL.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_avoidable_reshape.avoidable_reshape + +build tests/collections/reshape/ptgpp_avoidable_reshape.avoidable_reshape: phony tests/collections/reshape/CMakeFiles/ptgpp_avoidable_reshape.avoidable_reshape tests/collections/reshape/avoidable_reshape.h tests/collections/reshape/avoidable_reshape.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build tests/collections/reshape/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/collections/reshape/package_source: phony tests/collections/reshape/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tests/collections/reshape/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/collections/reshape/test: phony tests/collections/reshape/CMakeFiles/test.util + + +############################################# +# Utility command for list_install_components + +build tests/collections/reshape/list_install_components: phony + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape: phony tests/collections/reshape/input_dep_single_copy_reshape.h tests/collections/reshape/input_dep_single_copy_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/input_dep_single_copy_reshape.h + +build tests/collections/reshape/input_dep_single_copy_reshape.h tests/collections/reshape/input_dep_single_copy_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/input_dep_single_copy_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/input_dep_single_copy_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/input_dep_single_copy_reshape.jdf -C input_dep_single_copy_reshape.c -H input_dep_single_copy_reshape.h -f input_dep_single_copy_reshape + DESC = Generating input_dep_single_copy_reshape.h, input_dep_single_copy_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_no_re_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_no_re_reshape: phony tests/collections/reshape/remote_no_re_reshape.h tests/collections/reshape/remote_no_re_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/remote_no_re_reshape.h + +build tests/collections/reshape/remote_no_re_reshape.h tests/collections/reshape/remote_no_re_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/remote_no_re_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/remote_no_re_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/remote_no_re_reshape.jdf -C remote_no_re_reshape.c -H remote_no_re_reshape.h -f remote_no_re_reshape + DESC = Generating remote_no_re_reshape.h, remote_no_re_reshape.c + restat = 1 + + +############################################# +# Custom command for tests/collections/reshape/avoidable_reshape.h + +build tests/collections/reshape/avoidable_reshape.h tests/collections/reshape/avoidable_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/avoidable_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/avoidable_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/avoidable_reshape.jdf -C avoidable_reshape.c -H avoidable_reshape.h -f avoidable_reshape + DESC = Generating avoidable_reshape.h, avoidable_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_reshape: phony tests/collections/reshape/local_input_reshape.h tests/collections/reshape/local_input_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/local_input_reshape.h + +build tests/collections/reshape/local_input_reshape.h tests/collections/reshape/local_input_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/local_input_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/local_input_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/local_input_reshape.jdf -C local_input_reshape.c -H local_input_reshape.h -f local_input_reshape + DESC = Generating local_input_reshape.h, local_input_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_output_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_output_reshape: phony tests/collections/reshape/local_output_reshape.h tests/collections/reshape/local_output_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/local_output_reshape.h + +build tests/collections/reshape/local_output_reshape.h tests/collections/reshape/local_output_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/local_output_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/local_output_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/local_output_reshape.jdf -C local_output_reshape.c -H local_output_reshape.h -f local_output_reshape + DESC = Generating local_output_reshape.h, local_output_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_read_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.remote_read_reshape: phony tests/collections/reshape/remote_read_reshape.h tests/collections/reshape/remote_read_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/remote_read_reshape.h + +build tests/collections/reshape/remote_read_reshape.h tests/collections/reshape/remote_read_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/remote_read_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/remote_read_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/remote_read_reshape.jdf -C remote_read_reshape.c -H remote_read_reshape.h -f remote_read_reshape + DESC = Generating remote_read_reshape.h, remote_read_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_read_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_read_reshape: phony tests/collections/reshape/local_read_reshape.h tests/collections/reshape/local_read_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/local_read_reshape.h + +build tests/collections/reshape/local_read_reshape.h tests/collections/reshape/local_read_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/local_read_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/local_read_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/local_read_reshape.jdf -C local_read_reshape.c -H local_read_reshape.h -f local_read_reshape + DESC = Generating local_read_reshape.h, local_read_reshape.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_no_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_no_reshape: phony tests/collections/reshape/local_no_reshape.h tests/collections/reshape/local_no_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/collections/reshape/local_no_reshape.h + +build tests/collections/reshape/local_no_reshape.h tests/collections/reshape/local_no_reshape.c: CUSTOM_COMMAND ../tests/collections/reshape/local_no_reshape.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/local_no_reshape.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/local_no_reshape.jdf -C local_no_reshape.c -H local_no_reshape.h -f local_no_reshape + DESC = Generating local_no_reshape.h, local_no_reshape.c + restat = 1 + + +############################################# +# Custom command for tests/collections/reshape/local_input_LU_LL.h + +build tests/collections/reshape/local_input_LU_LL.h tests/collections/reshape/local_input_LU_LL.c: CUSTOM_COMMAND ../tests/collections/reshape/local_input_LU_LL.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/local_input_LU_LL.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/local_input_LU_LL.jdf -C local_input_LU_LL.c -H local_input_LU_LL.h -f local_input_LU_LL + DESC = Generating local_input_LU_LL.h, local_input_LU_LL.c + restat = 1 + + +############################################# +# Custom command for tests/collections/reshape/remote_multiple_outs_same_pred_flow.h + +build tests/collections/reshape/remote_multiple_outs_same_pred_flow.h tests/collections/reshape/remote_multiple_outs_same_pred_flow.c: CUSTOM_COMMAND ../tests/collections/reshape/remote_multiple_outs_same_pred_flow.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/remote_multiple_outs_same_pred_flow.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/remote_multiple_outs_same_pred_flow.jdf -C remote_multiple_outs_same_pred_flow.c -H remote_multiple_outs_same_pred_flow.h -f remote_multiple_outs_same_pred_flow + DESC = Generating remote_multiple_outs_same_pred_flow.h, remote_multiple_outs_same_pred_flow.c + restat = 1 + + +############################################# +# Custom command for tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.h + +build tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.h tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.c: CUSTOM_COMMAND ../tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/collections/reshape && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.jdf -C remote_multiple_outs_same_pred_flow_multiple_deps.c -H remote_multiple_outs_same_pred_flow_multiple_deps.h -f remote_multiple_outs_same_pred_flow_multiple_deps + DESC = Generating remote_multiple_outs_same_pred_flow_multiple_deps.h, remote_multiple_outs_same_pred_flow_multiple_deps.c + restat = 1 + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow + +build tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow: phony tests/collections/reshape/remote_multiple_outs_same_pred_flow.h tests/collections/reshape/remote_multiple_outs_same_pred_flow.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps + +build tests/collections/reshape/CMakeFiles/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps: phony tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.h tests/collections/reshape/remote_multiple_outs_same_pred_flow_multiple_deps.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_LU_LL + +build tests/collections/reshape/CMakeFiles/ptgpp_reshape.local_input_LU_LL: phony tests/collections/reshape/local_input_LU_LL.h tests/collections/reshape/local_input_LU_LL.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/collections/reshape/CMakeFiles/ptgpp_avoidable_reshape.avoidable_reshape + +build tests/collections/reshape/CMakeFiles/ptgpp_avoidable_reshape.avoidable_reshape: phony tests/collections/reshape/avoidable_reshape.h tests/collections/reshape/avoidable_reshape.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tests/dsl/dtd/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/dsl/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/dsl/dtd/install/strip: phony tests/dsl/dtd/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install + +build tests/dsl/dtd/CMakeFiles/install.util: CUSTOM_COMMAND tests/dsl/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/dsl/dtd/install: phony tests/dsl/dtd/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tests/dsl/dtd/list_install_components: phony + + +############################################# +# Utility command for edit_cache + +build tests/dsl/dtd/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/dsl/dtd/edit_cache: phony tests/dsl/dtd/CMakeFiles/edit_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_flag_dont_track + + +############################################# +# Order-only phony target for dtd_test_flag_dont_track + +build cmake_object_order_depends_target_dtd_test_flag_dont_track: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir/dtd_test_flag_dont_track.c.o: C_COMPILER__dtd_test_flag_dont_track_RelWithDebInfo ../tests/dsl/dtd/dtd_test_flag_dont_track.c || cmake_object_order_depends_target_dtd_test_flag_dont_track + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir/dtd_test_flag_dont_track.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_flag_dont_track.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_flag_dont_track + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_flag_dont_track + +build tests/dsl/dtd/dtd_test_flag_dont_track: C_EXECUTABLE_LINKER__dtd_test_flag_dont_track_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir/dtd_test_flag_dont_track.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_flag_dont_track.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_flag_dont_track + TARGET_PDB = tests/dsl/dtd/dtd_test_flag_dont_track.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_multiple_handle_wait + + +############################################# +# Order-only phony target for dtd_test_multiple_handle_wait + +build cmake_object_order_depends_target_dtd_test_multiple_handle_wait: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir/dtd_test_multiple_handle_wait.c.o: C_COMPILER__dtd_test_multiple_handle_wait_RelWithDebInfo ../tests/dsl/dtd/dtd_test_multiple_handle_wait.c || cmake_object_order_depends_target_dtd_test_multiple_handle_wait + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir/dtd_test_multiple_handle_wait.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_multiple_handle_wait.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_multiple_handle_wait + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_multiple_handle_wait + +build tests/dsl/dtd/dtd_test_multiple_handle_wait: C_EXECUTABLE_LINKER__dtd_test_multiple_handle_wait_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir/dtd_test_multiple_handle_wait.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_multiple_handle_wait.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_multiple_handle_wait + TARGET_PDB = tests/dsl/dtd/dtd_test_multiple_handle_wait.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_task_generation + + +############################################# +# Order-only phony target for dtd_test_task_generation + +build cmake_object_order_depends_target_dtd_test_task_generation: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir/dtd_test_task_generation.c.o: C_COMPILER__dtd_test_task_generation_RelWithDebInfo ../tests/dsl/dtd/dtd_test_task_generation.c || cmake_object_order_depends_target_dtd_test_task_generation + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir/dtd_test_task_generation.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_task_generation.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_task_generation + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_task_generation + +build tests/dsl/dtd/dtd_test_task_generation: C_EXECUTABLE_LINKER__dtd_test_task_generation_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir/dtd_test_task_generation.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_generation.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_task_generation + TARGET_PDB = tests/dsl/dtd/dtd_test_task_generation.pdb + + +############################################# +# Utility command for test + +build tests/dsl/dtd/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/dsl/dtd/test: phony tests/dsl/dtd/CMakeFiles/test.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_task_insertion + + +############################################# +# Order-only phony target for dtd_test_task_insertion + +build cmake_object_order_depends_target_dtd_test_task_insertion: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir/dtd_test_task_insertion.c.o: C_COMPILER__dtd_test_task_insertion_RelWithDebInfo ../tests/dsl/dtd/dtd_test_task_insertion.c || cmake_object_order_depends_target_dtd_test_task_insertion + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir/dtd_test_task_insertion.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_task_insertion.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_task_insertion + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_task_insertion + +build tests/dsl/dtd/dtd_test_task_insertion: C_EXECUTABLE_LINKER__dtd_test_task_insertion_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir/dtd_test_task_insertion.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_insertion.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_task_insertion + TARGET_PDB = tests/dsl/dtd/dtd_test_task_insertion.pdb + + +############################################# +# Utility command for install/local + +build tests/dsl/dtd/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/dsl/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/dsl/dtd/install/local: phony tests/dsl/dtd/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_global_id_for_dc_assumed + + +############################################# +# Order-only phony target for dtd_test_global_id_for_dc_assumed + +build cmake_object_order_depends_target_dtd_test_global_id_for_dc_assumed: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir/dtd_test_global_id_for_dc_assumed.c.o: C_COMPILER__dtd_test_global_id_for_dc_assumed_RelWithDebInfo ../tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.c || cmake_object_order_depends_target_dtd_test_global_id_for_dc_assumed + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir/dtd_test_global_id_for_dc_assumed.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_global_id_for_dc_assumed + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_global_id_for_dc_assumed + +build tests/dsl/dtd/dtd_test_global_id_for_dc_assumed: C_EXECUTABLE_LINKER__dtd_test_global_id_for_dc_assumed_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir/dtd_test_global_id_for_dc_assumed.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_global_id_for_dc_assumed.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_global_id_for_dc_assumed + TARGET_PDB = tests/dsl/dtd/dtd_test_global_id_for_dc_assumed.pdb + + +############################################# +# Utility command for rebuild_cache + +build tests/dsl/dtd/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/dsl/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/dsl/dtd/rebuild_cache: phony tests/dsl/dtd/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_war + + +############################################# +# Order-only phony target for dtd_test_war + +build cmake_object_order_depends_target_dtd_test_war: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_war.dir/dtd_test_war.c.o: C_COMPILER__dtd_test_war_RelWithDebInfo ../tests/dsl/dtd/dtd_test_war.c || cmake_object_order_depends_target_dtd_test_war + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir/dtd_test_war.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_war.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_war + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_war + +build tests/dsl/dtd/dtd_test_war: C_EXECUTABLE_LINKER__dtd_test_war_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_war.dir/dtd_test_war.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_war.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_war + TARGET_PDB = tests/dsl/dtd/dtd_test_war.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_hierarchy + + +############################################# +# Order-only phony target for dtd_test_hierarchy + +build cmake_object_order_depends_target_dtd_test_hierarchy: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir/dtd_test_hierarchy.c.o: C_COMPILER__dtd_test_hierarchy_RelWithDebInfo ../tests/dsl/dtd/dtd_test_hierarchy.c || cmake_object_order_depends_target_dtd_test_hierarchy + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir/dtd_test_hierarchy.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_hierarchy.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_hierarchy + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_hierarchy + +build tests/dsl/dtd/dtd_test_hierarchy: C_EXECUTABLE_LINKER__dtd_test_hierarchy_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir/dtd_test_hierarchy.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_hierarchy.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_hierarchy + TARGET_PDB = tests/dsl/dtd/dtd_test_hierarchy.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_null_as_tile + + +############################################# +# Order-only phony target for dtd_test_null_as_tile + +build cmake_object_order_depends_target_dtd_test_null_as_tile: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir/dtd_test_null_as_tile.c.o: C_COMPILER__dtd_test_null_as_tile_RelWithDebInfo ../tests/dsl/dtd/dtd_test_null_as_tile.c || cmake_object_order_depends_target_dtd_test_null_as_tile + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir/dtd_test_null_as_tile.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_null_as_tile.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_null_as_tile + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_null_as_tile + +build tests/dsl/dtd/dtd_test_null_as_tile: C_EXECUTABLE_LINKER__dtd_test_null_as_tile_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir/dtd_test_null_as_tile.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_null_as_tile.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_null_as_tile + TARGET_PDB = tests/dsl/dtd/dtd_test_null_as_tile.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_pingpong + + +############################################# +# Order-only phony target for dtd_test_pingpong + +build cmake_object_order_depends_target_dtd_test_pingpong: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir/dtd_test_pingpong.c.o: C_COMPILER__dtd_test_pingpong_RelWithDebInfo ../tests/dsl/dtd/dtd_test_pingpong.c || cmake_object_order_depends_target_dtd_test_pingpong + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir/dtd_test_pingpong.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_pingpong.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_pingpong + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_pingpong + +build tests/dsl/dtd/dtd_test_pingpong: C_EXECUTABLE_LINKER__dtd_test_pingpong_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir/dtd_test_pingpong.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_pingpong.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_pingpong + TARGET_PDB = tests/dsl/dtd/dtd_test_pingpong.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_allreduce + + +############################################# +# Order-only phony target for dtd_test_allreduce + +build cmake_object_order_depends_target_dtd_test_allreduce: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir/dtd_test_allreduce.c.o: C_COMPILER__dtd_test_allreduce_RelWithDebInfo ../tests/dsl/dtd/dtd_test_allreduce.c || cmake_object_order_depends_target_dtd_test_allreduce + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir/dtd_test_allreduce.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_allreduce.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_allreduce + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_allreduce + +build tests/dsl/dtd/dtd_test_allreduce: C_EXECUTABLE_LINKER__dtd_test_allreduce_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir/dtd_test_allreduce.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_allreduce.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_allreduce + TARGET_PDB = tests/dsl/dtd/dtd_test_allreduce.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_insert_task_interface + + +############################################# +# Order-only phony target for dtd_test_insert_task_interface + +build cmake_object_order_depends_target_dtd_test_insert_task_interface: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir/dtd_test_insert_task_interface.c.o: C_COMPILER__dtd_test_insert_task_interface_RelWithDebInfo ../tests/dsl/dtd/dtd_test_insert_task_interface.c || cmake_object_order_depends_target_dtd_test_insert_task_interface + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir/dtd_test_insert_task_interface.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_insert_task_interface.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_insert_task_interface + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_insert_task_interface + +build tests/dsl/dtd/dtd_test_insert_task_interface: C_EXECUTABLE_LINKER__dtd_test_insert_task_interface_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir/dtd_test_insert_task_interface.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_insert_task_interface.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_insert_task_interface + TARGET_PDB = tests/dsl/dtd/dtd_test_insert_task_interface.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_template_counter + + +############################################# +# Order-only phony target for dtd_test_template_counter + +build cmake_object_order_depends_target_dtd_test_template_counter: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir/dtd_test_template_counter.c.o: C_COMPILER__dtd_test_template_counter_RelWithDebInfo ../tests/dsl/dtd/dtd_test_template_counter.c || cmake_object_order_depends_target_dtd_test_template_counter + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir/dtd_test_template_counter.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_template_counter.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_template_counter + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_template_counter + +build tests/dsl/dtd/dtd_test_template_counter: C_EXECUTABLE_LINKER__dtd_test_template_counter_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir/dtd_test_template_counter.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_template_counter.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_template_counter + TARGET_PDB = tests/dsl/dtd/dtd_test_template_counter.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_task_inserting_task + + +############################################# +# Order-only phony target for dtd_test_task_inserting_task + +build cmake_object_order_depends_target_dtd_test_task_inserting_task: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir/dtd_test_task_inserting_task.c.o: C_COMPILER__dtd_test_task_inserting_task_RelWithDebInfo ../tests/dsl/dtd/dtd_test_task_inserting_task.c || cmake_object_order_depends_target_dtd_test_task_inserting_task + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir/dtd_test_task_inserting_task.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_task_inserting_task.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_task_inserting_task + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_task_inserting_task + +build tests/dsl/dtd/dtd_test_task_inserting_task: C_EXECUTABLE_LINKER__dtd_test_task_inserting_task_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir/dtd_test_task_inserting_task.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_inserting_task.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_task_inserting_task + TARGET_PDB = tests/dsl/dtd/dtd_test_task_inserting_task.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_untie + + +############################################# +# Order-only phony target for dtd_test_untie + +build cmake_object_order_depends_target_dtd_test_untie: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir/dtd_test_untie.c.o: C_COMPILER__dtd_test_untie_RelWithDebInfo ../tests/dsl/dtd/dtd_test_untie.c || cmake_object_order_depends_target_dtd_test_untie + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir/dtd_test_untie.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_untie.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_untie + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_untie + +build tests/dsl/dtd/dtd_test_untie: C_EXECUTABLE_LINKER__dtd_test_untie_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir/dtd_test_untie.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_untie.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_untie + TARGET_PDB = tests/dsl/dtd/dtd_test_untie.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_task_placement + + +############################################# +# Order-only phony target for dtd_test_task_placement + +build cmake_object_order_depends_target_dtd_test_task_placement: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir/dtd_test_task_placement.c.o: C_COMPILER__dtd_test_task_placement_RelWithDebInfo ../tests/dsl/dtd/dtd_test_task_placement.c || cmake_object_order_depends_target_dtd_test_task_placement + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir/dtd_test_task_placement.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_task_placement.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_task_placement + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_task_placement + +build tests/dsl/dtd/dtd_test_task_placement: C_EXECUTABLE_LINKER__dtd_test_task_placement_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir/dtd_test_task_placement.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_task_placement.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_task_placement + TARGET_PDB = tests/dsl/dtd/dtd_test_task_placement.pdb + + +############################################# +# Utility command for package_source + +build tests/dsl/dtd/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/dsl/dtd/package_source: phony tests/dsl/dtd/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_data_flush + + +############################################# +# Order-only phony target for dtd_test_data_flush + +build cmake_object_order_depends_target_dtd_test_data_flush: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir/dtd_test_data_flush.c.o: C_COMPILER__dtd_test_data_flush_RelWithDebInfo ../tests/dsl/dtd/dtd_test_data_flush.c || cmake_object_order_depends_target_dtd_test_data_flush + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir/dtd_test_data_flush.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_data_flush.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_data_flush + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_data_flush + +build tests/dsl/dtd/dtd_test_data_flush: C_EXECUTABLE_LINKER__dtd_test_data_flush_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir/dtd_test_data_flush.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_data_flush.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_data_flush + TARGET_PDB = tests/dsl/dtd/dtd_test_data_flush.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_broadcast + + +############################################# +# Order-only phony target for dtd_test_broadcast + +build cmake_object_order_depends_target_dtd_test_broadcast: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir/dtd_test_broadcast.c.o: C_COMPILER__dtd_test_broadcast_RelWithDebInfo ../tests/dsl/dtd/dtd_test_broadcast.c || cmake_object_order_depends_target_dtd_test_broadcast + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir/dtd_test_broadcast.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_broadcast.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_broadcast + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_broadcast + +build tests/dsl/dtd/dtd_test_broadcast: C_EXECUTABLE_LINKER__dtd_test_broadcast_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir/dtd_test_broadcast.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_broadcast.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_broadcast + TARGET_PDB = tests/dsl/dtd/dtd_test_broadcast.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_explicit_task_creation + + +############################################# +# Order-only phony target for dtd_test_explicit_task_creation + +build cmake_object_order_depends_target_dtd_test_explicit_task_creation: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir/dtd_test_explicit_task_creation.c.o: C_COMPILER__dtd_test_explicit_task_creation_RelWithDebInfo ../tests/dsl/dtd/dtd_test_explicit_task_creation.c || cmake_object_order_depends_target_dtd_test_explicit_task_creation + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir/dtd_test_explicit_task_creation.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_explicit_task_creation.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_explicit_task_creation + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_explicit_task_creation + +build tests/dsl/dtd/dtd_test_explicit_task_creation: C_EXECUTABLE_LINKER__dtd_test_explicit_task_creation_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir/dtd_test_explicit_task_creation.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_explicit_task_creation.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_explicit_task_creation + TARGET_PDB = tests/dsl/dtd/dtd_test_explicit_task_creation.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_reduce + + +############################################# +# Order-only phony target for dtd_test_reduce + +build cmake_object_order_depends_target_dtd_test_reduce: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir/dtd_test_reduce.c.o: C_COMPILER__dtd_test_reduce_RelWithDebInfo ../tests/dsl/dtd/dtd_test_reduce.c || cmake_object_order_depends_target_dtd_test_reduce + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir/dtd_test_reduce.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_reduce.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_reduce + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_reduce + +build tests/dsl/dtd/dtd_test_reduce: C_EXECUTABLE_LINKER__dtd_test_reduce_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir/dtd_test_reduce.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_reduce.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_reduce + TARGET_PDB = tests/dsl/dtd/dtd_test_reduce.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_tp_enqueue_dequeue + + +############################################# +# Order-only phony target for dtd_test_tp_enqueue_dequeue + +build cmake_object_order_depends_target_dtd_test_tp_enqueue_dequeue: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir/dtd_test_tp_enqueue_dequeue.c.o: C_COMPILER__dtd_test_tp_enqueue_dequeue_RelWithDebInfo ../tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.c || cmake_object_order_depends_target_dtd_test_tp_enqueue_dequeue + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir/dtd_test_tp_enqueue_dequeue.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_tp_enqueue_dequeue + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_tp_enqueue_dequeue + +build tests/dsl/dtd/dtd_test_tp_enqueue_dequeue: C_EXECUTABLE_LINKER__dtd_test_tp_enqueue_dequeue_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir/dtd_test_tp_enqueue_dequeue.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_tp_enqueue_dequeue.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_tp_enqueue_dequeue + TARGET_PDB = tests/dsl/dtd/dtd_test_tp_enqueue_dequeue.pdb + + +############################################# +# Utility command for package + +build tests/dsl/dtd/CMakeFiles/package.util: CUSTOM_COMMAND tests/dsl/dtd/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/dsl/dtd/package: phony tests/dsl/dtd/CMakeFiles/package.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_test_interleave_actions + + +############################################# +# Order-only phony target for dtd_test_interleave_actions + +build cmake_object_order_depends_target_dtd_test_interleave_actions: phony || cmake_object_order_depends_target_parsec cmake_object_order_depends_target_tests_common + +build tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir/dtd_test_interleave_actions.c.o: C_COMPILER__dtd_test_interleave_actions_RelWithDebInfo ../tests/dsl/dtd/dtd_test_interleave_actions.c || cmake_object_order_depends_target_dtd_test_interleave_actions + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir/dtd_test_interleave_actions.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir + OBJECT_FILE_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir/ + TARGET_PDB = tests/dsl/dtd/dtd_test_interleave_actions.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_test_interleave_actions + + +############################################# +# Link the executable tests/dsl/dtd/dtd_test_interleave_actions + +build tests/dsl/dtd/dtd_test_interleave_actions: C_EXECUTABLE_LINKER__dtd_test_interleave_actions_RelWithDebInfo tests/CMakeFiles/tests_common.dir/tests_data.c.o tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir/dtd_test_interleave_actions.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so tests/tests_common parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/dsl/dtd/CMakeFiles/dtd_test_interleave_actions.dir/ + TARGET_FILE = tests/dsl/dtd/dtd_test_interleave_actions + TARGET_PDB = tests/dsl/dtd/dtd_test_interleave_actions.pdb + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build tests/apps/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/install/strip: phony tests/apps/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build tests/apps/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/install/local: phony tests/apps/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tests/apps/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/install: phony tests/apps/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/rebuild_cache: phony tests/apps/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tests/apps/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/edit_cache: phony tests/apps/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tests/apps/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/package_source: phony tests/apps/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build tests/apps/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/test: phony tests/apps/CMakeFiles/test.util + + +############################################# +# Utility command for package + +build tests/apps/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/package: phony tests/apps/CMakeFiles/package.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/apps/pingpong/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/pingpong/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/pingpong/install/local: phony tests/apps/pingpong/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build tests/apps/pingpong/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/pingpong/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/pingpong/install: phony tests/apps/pingpong/CMakeFiles/install.util + + +############################################# +# Utility command for ptgpp_rtt.rtt + +build tests/apps/pingpong/ptgpp_rtt.rtt: phony tests/apps/pingpong/CMakeFiles/ptgpp_rtt.rtt tests/apps/pingpong/rtt.h tests/apps/pingpong/rtt.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target bw_test + + +############################################# +# Order-only phony target for bw_test + +build cmake_object_order_depends_target_bw_test: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/pingpong/bandwidth.c tests/apps/pingpong/bandwidth.h tests/apps/pingpong/ptgpp_bw_test.bandwidth + +build tests/apps/pingpong/CMakeFiles/bw_test.dir/bandwidth.c.o: C_COMPILER__bw_test_RelWithDebInfo tests/apps/pingpong/bandwidth.c || cmake_object_order_depends_target_bw_test + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/pingpong/CMakeFiles/bw_test.dir/bandwidth.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/pingpong -Iparsec/include/fortran + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/bw_test.dir + OBJECT_FILE_DIR = tests/apps/pingpong/CMakeFiles/bw_test.dir + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/bw_test.dir/ + TARGET_PDB = tests/apps/pingpong/bw_test.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target bw_test + + +############################################# +# Link the executable tests/apps/pingpong/bw_test + +build tests/apps/pingpong/bw_test: C_EXECUTABLE_LINKER__bw_test_RelWithDebInfo tests/apps/pingpong/CMakeFiles/bw_test.dir/bandwidth.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/pingpong/ptgpp_bw_test.bandwidth parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/bw_test.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/bw_test.dir/ + TARGET_FILE = tests/apps/pingpong/bw_test + TARGET_PDB = tests/apps/pingpong/bw_test.pdb + + +############################################# +# Utility command for package + +build tests/apps/pingpong/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/pingpong/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/pingpong/package: phony tests/apps/pingpong/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/apps/pingpong/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/pingpong/test: phony tests/apps/pingpong/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/apps/pingpong/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/pingpong/package_source: phony tests/apps/pingpong/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target rtt + + +############################################# +# Order-only phony target for rtt + +build cmake_object_order_depends_target_rtt: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/pingpong/ptgpp_rtt.rtt tests/apps/pingpong/rtt.c tests/apps/pingpong/rtt.h + +build tests/apps/pingpong/CMakeFiles/rtt.dir/main.c.o: C_COMPILER__rtt_RelWithDebInfo ../tests/apps/pingpong/main.c || cmake_object_order_depends_target_rtt + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/pingpong/CMakeFiles/rtt.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/pingpong -Iparsec/include/fortran + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + OBJECT_FILE_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/rtt.dir/ + TARGET_PDB = tests/apps/pingpong/rtt.pdb + +build tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_wrapper.c.o: C_COMPILER__rtt_RelWithDebInfo ../tests/apps/pingpong/rtt_wrapper.c || cmake_object_order_depends_target_rtt + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/pingpong -Iparsec/include/fortran + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + OBJECT_FILE_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/rtt.dir/ + TARGET_PDB = tests/apps/pingpong/rtt.pdb + +build tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_data.c.o: C_COMPILER__rtt_RelWithDebInfo ../tests/apps/pingpong/rtt_data.c || cmake_object_order_depends_target_rtt + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/pingpong -Iparsec/include/fortran + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + OBJECT_FILE_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/rtt.dir/ + TARGET_PDB = tests/apps/pingpong/rtt.pdb + +build tests/apps/pingpong/CMakeFiles/rtt.dir/rtt.c.o: C_COMPILER__rtt_RelWithDebInfo tests/apps/pingpong/rtt.c || cmake_object_order_depends_target_rtt + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/pingpong/CMakeFiles/rtt.dir/rtt.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/pingpong -Iparsec/include/fortran + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + OBJECT_FILE_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/rtt.dir/ + TARGET_PDB = tests/apps/pingpong/rtt.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target rtt + + +############################################# +# Link the executable tests/apps/pingpong/rtt + +build tests/apps/pingpong/rtt: C_EXECUTABLE_LINKER__rtt_RelWithDebInfo tests/apps/pingpong/CMakeFiles/rtt.dir/main.c.o tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_wrapper.c.o tests/apps/pingpong/CMakeFiles/rtt.dir/rtt_data.c.o tests/apps/pingpong/CMakeFiles/rtt.dir/rtt.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/pingpong/ptgpp_rtt.rtt parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/pingpong/CMakeFiles/rtt.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/pingpong/CMakeFiles/rtt.dir/ + TARGET_FILE = tests/apps/pingpong/rtt + TARGET_PDB = tests/apps/pingpong/rtt.pdb + + +############################################# +# Utility command for edit_cache + +build tests/apps/pingpong/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/pingpong/edit_cache: phony tests/apps/pingpong/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/pingpong/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/pingpong/rebuild_cache: phony tests/apps/pingpong/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/apps/pingpong/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/pingpong/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/pingpong/install/strip: phony tests/apps/pingpong/CMakeFiles/install/strip.util + + +############################################# +# Utility command for ptgpp_bw_test.bandwidth + +build tests/apps/pingpong/ptgpp_bw_test.bandwidth: phony tests/apps/pingpong/CMakeFiles/ptgpp_bw_test.bandwidth tests/apps/pingpong/bandwidth.h tests/apps/pingpong/bandwidth.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for list_install_components + +build tests/apps/pingpong/list_install_components: phony + + +############################################# +# Phony custom command for tests/apps/pingpong/CMakeFiles/ptgpp_rtt.rtt + +build tests/apps/pingpong/CMakeFiles/ptgpp_rtt.rtt: phony tests/apps/pingpong/rtt.h tests/apps/pingpong/rtt.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/apps/pingpong/rtt.h + +build tests/apps/pingpong/rtt.h tests/apps/pingpong/rtt.c: CUSTOM_COMMAND ../tests/apps/pingpong/rtt.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/pingpong/rtt.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/pingpong/rtt.jdf -C rtt.c -H rtt.h -f rtt + DESC = Generating rtt.h, rtt.c + restat = 1 + + +############################################# +# Custom command for tests/apps/pingpong/bandwidth.h + +build tests/apps/pingpong/bandwidth.h tests/apps/pingpong/bandwidth.c: CUSTOM_COMMAND ../tests/apps/pingpong/bandwidth.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/pingpong/bandwidth.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/pingpong && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp --Wremoteref -E -i /home/joseph/parsec/tests/apps/pingpong/bandwidth.jdf -C bandwidth.c -H bandwidth.h -f bandwidth + DESC = Generating bandwidth.h, bandwidth.c + restat = 1 + + +############################################# +# Phony custom command for tests/apps/pingpong/CMakeFiles/ptgpp_bw_test.bandwidth + +build tests/apps/pingpong/CMakeFiles/ptgpp_bw_test.bandwidth: phony tests/apps/pingpong/bandwidth.h tests/apps/pingpong/bandwidth.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target a2a + + +############################################# +# Order-only phony target for a2a + +build cmake_object_order_depends_target_a2a: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/all2all/a2a.c tests/apps/all2all/a2a.h tests/apps/all2all/ptgpp_a2a.a2a + +build tests/apps/all2all/CMakeFiles/a2a.dir/main.c.o: C_COMPILER__a2a_RelWithDebInfo ../tests/apps/all2all/main.c || cmake_object_order_depends_target_a2a + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/all2all/CMakeFiles/a2a.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/all2all -Iparsec/include/fortran + OBJECT_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + OBJECT_FILE_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + TARGET_COMPILE_PDB = tests/apps/all2all/CMakeFiles/a2a.dir/ + TARGET_PDB = tests/apps/all2all/a2a.pdb + +build tests/apps/all2all/CMakeFiles/a2a.dir/a2a_data.c.o: C_COMPILER__a2a_RelWithDebInfo ../tests/apps/all2all/a2a_data.c || cmake_object_order_depends_target_a2a + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/all2all/CMakeFiles/a2a.dir/a2a_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/all2all -Iparsec/include/fortran + OBJECT_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + OBJECT_FILE_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + TARGET_COMPILE_PDB = tests/apps/all2all/CMakeFiles/a2a.dir/ + TARGET_PDB = tests/apps/all2all/a2a.pdb + +build tests/apps/all2all/CMakeFiles/a2a.dir/a2a.c.o: C_COMPILER__a2a_RelWithDebInfo tests/apps/all2all/a2a.c || cmake_object_order_depends_target_a2a + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/all2all/CMakeFiles/a2a.dir/a2a.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/all2all -Iparsec/include/fortran + OBJECT_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + OBJECT_FILE_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + TARGET_COMPILE_PDB = tests/apps/all2all/CMakeFiles/a2a.dir/ + TARGET_PDB = tests/apps/all2all/a2a.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target a2a + + +############################################# +# Link the executable tests/apps/all2all/a2a + +build tests/apps/all2all/a2a: C_EXECUTABLE_LINKER__a2a_RelWithDebInfo tests/apps/all2all/CMakeFiles/a2a.dir/main.c.o tests/apps/all2all/CMakeFiles/a2a.dir/a2a_data.c.o tests/apps/all2all/CMakeFiles/a2a.dir/a2a.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/all2all/ptgpp_a2a.a2a parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/all2all/CMakeFiles/a2a.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/all2all/CMakeFiles/a2a.dir/ + TARGET_FILE = tests/apps/all2all/a2a + TARGET_PDB = tests/apps/all2all/a2a.pdb + + +############################################# +# Utility command for package + +build tests/apps/all2all/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/all2all/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/all2all/package: phony tests/apps/all2all/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/apps/all2all/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/all2all/test: phony tests/apps/all2all/CMakeFiles/test.util + + +############################################# +# Utility command for ptgpp_a2a.a2a + +build tests/apps/all2all/ptgpp_a2a.a2a: phony tests/apps/all2all/CMakeFiles/ptgpp_a2a.a2a tests/apps/all2all/a2a.h tests/apps/all2all/a2a.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for edit_cache + +build tests/apps/all2all/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/all2all/edit_cache: phony tests/apps/all2all/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package_source + +build tests/apps/all2all/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/all2all/package_source: phony tests/apps/all2all/CMakeFiles/package_source.util + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/all2all/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/all2all/rebuild_cache: phony tests/apps/all2all/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/apps/all2all/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/all2all/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/all2all/install/strip: phony tests/apps/all2all/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/all2all/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/apps/all2all/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/all2all/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/all2all/install: phony tests/apps/all2all/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/apps/all2all/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/all2all/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/all2all/install/local: phony tests/apps/all2all/CMakeFiles/install/local.util + + +############################################# +# Custom command for tests/apps/all2all/a2a.h + +build tests/apps/all2all/a2a.h tests/apps/all2all/a2a.c: CUSTOM_COMMAND ../tests/apps/all2all/a2a.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/all2all/a2a.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/all2all && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/all2all/a2a.jdf -C a2a.c -H a2a.h -f a2a + DESC = Generating a2a.h, a2a.c + restat = 1 + + +############################################# +# Phony custom command for tests/apps/all2all/CMakeFiles/ptgpp_a2a.a2a + +build tests/apps/all2all/CMakeFiles/ptgpp_a2a.a2a: phony tests/apps/all2all/a2a.h tests/apps/all2all/a2a.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target BT_reduction + + +############################################# +# Order-only phony target for BT_reduction + +build cmake_object_order_depends_target_BT_reduction: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/generalized_reduction/BT_reduction.c tests/apps/generalized_reduction/BT_reduction.h tests/apps/generalized_reduction/ptgpp_BT_reduction.BT_reduction + +build tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/main.c.o: C_COMPILER__BT_reduction_RelWithDebInfo ../tests/apps/generalized_reduction/main.c || cmake_object_order_depends_target_BT_reduction + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/generalized_reduction -Iparsec/include/fortran + OBJECT_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + OBJECT_FILE_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + TARGET_COMPILE_PDB = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/ + TARGET_PDB = tests/apps/generalized_reduction/BT_reduction.pdb + +build tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction_wrapper.c.o: C_COMPILER__BT_reduction_RelWithDebInfo ../tests/apps/generalized_reduction/BT_reduction_wrapper.c || cmake_object_order_depends_target_BT_reduction + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/generalized_reduction -Iparsec/include/fortran + OBJECT_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + OBJECT_FILE_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + TARGET_COMPILE_PDB = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/ + TARGET_PDB = tests/apps/generalized_reduction/BT_reduction.pdb + +build tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/reduc_data.c.o: C_COMPILER__BT_reduction_RelWithDebInfo ../tests/apps/generalized_reduction/reduc_data.c || cmake_object_order_depends_target_BT_reduction + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/reduc_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/generalized_reduction -Iparsec/include/fortran + OBJECT_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + OBJECT_FILE_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + TARGET_COMPILE_PDB = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/ + TARGET_PDB = tests/apps/generalized_reduction/BT_reduction.pdb + +build tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction.c.o: C_COMPILER__BT_reduction_RelWithDebInfo tests/apps/generalized_reduction/BT_reduction.c || cmake_object_order_depends_target_BT_reduction + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/generalized_reduction -Iparsec/include/fortran + OBJECT_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + OBJECT_FILE_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + TARGET_COMPILE_PDB = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/ + TARGET_PDB = tests/apps/generalized_reduction/BT_reduction.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target BT_reduction + + +############################################# +# Link the executable tests/apps/generalized_reduction/BT_reduction + +build tests/apps/generalized_reduction/BT_reduction: C_EXECUTABLE_LINKER__BT_reduction_RelWithDebInfo tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/main.c.o tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction_wrapper.c.o tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/reduc_data.c.o tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/BT_reduction.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/generalized_reduction/ptgpp_BT_reduction.BT_reduction parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/generalized_reduction/CMakeFiles/BT_reduction.dir/ + TARGET_FILE = tests/apps/generalized_reduction/BT_reduction + TARGET_PDB = tests/apps/generalized_reduction/BT_reduction.pdb + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/generalized_reduction/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/rebuild_cache: phony tests/apps/generalized_reduction/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build tests/apps/generalized_reduction/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/generalized_reduction/edit_cache: phony tests/apps/generalized_reduction/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for package + +build tests/apps/generalized_reduction/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/generalized_reduction/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/package: phony tests/apps/generalized_reduction/CMakeFiles/package.util + + +############################################# +# Utility command for ptgpp_BT_reduction.BT_reduction + +build tests/apps/generalized_reduction/ptgpp_BT_reduction.BT_reduction: phony tests/apps/generalized_reduction/CMakeFiles/ptgpp_BT_reduction.BT_reduction tests/apps/generalized_reduction/BT_reduction.h tests/apps/generalized_reduction/BT_reduction.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for test + +build tests/apps/generalized_reduction/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/test: phony tests/apps/generalized_reduction/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/apps/generalized_reduction/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/package_source: phony tests/apps/generalized_reduction/CMakeFiles/package_source.util + + +############################################# +# Utility command for install/strip + +build tests/apps/generalized_reduction/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/generalized_reduction/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/install/strip: phony tests/apps/generalized_reduction/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/generalized_reduction/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/apps/generalized_reduction/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/generalized_reduction/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/install: phony tests/apps/generalized_reduction/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/apps/generalized_reduction/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/generalized_reduction/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/generalized_reduction/install/local: phony tests/apps/generalized_reduction/CMakeFiles/install/local.util + + +############################################# +# Custom command for tests/apps/generalized_reduction/BT_reduction.h + +build tests/apps/generalized_reduction/BT_reduction.h tests/apps/generalized_reduction/BT_reduction.c: CUSTOM_COMMAND ../tests/apps/generalized_reduction/BT_reduction.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/generalized_reduction/BT_reduction.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/generalized_reduction && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/generalized_reduction/BT_reduction.jdf -C BT_reduction.c -H BT_reduction.h -f BT_reduction + DESC = Generating BT_reduction.h, BT_reduction.c + restat = 1 + + +############################################# +# Phony custom command for tests/apps/generalized_reduction/CMakeFiles/ptgpp_BT_reduction.BT_reduction + +build tests/apps/generalized_reduction/CMakeFiles/ptgpp_BT_reduction.BT_reduction: phony tests/apps/generalized_reduction/BT_reduction.h tests/apps/generalized_reduction/BT_reduction.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + +# ============================================================================= +# Object build statements for EXECUTABLE target testing_stencil_1D + + +############################################# +# Order-only phony target for testing_stencil_1D + +build cmake_object_order_depends_target_testing_stencil_1D: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/stencil/ptgpp_testing_stencil_1D.stencil_1D tests/apps/stencil/stencil_1D.c tests/apps/stencil/stencil_1D.h + +build tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_internal.c.o: C_COMPILER__testing_stencil_1D_RelWithDebInfo ../tests/apps/stencil/stencil_internal.c || cmake_object_order_depends_target_testing_stencil_1D + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_internal.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/stencil -Itests/apps/stencil -Iparsec/include/fortran + OBJECT_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + OBJECT_FILE_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + TARGET_COMPILE_PDB = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/ + TARGET_PDB = tests/apps/stencil/testing_stencil_1D.pdb + +build tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/testing_stencil_1D.c.o: C_COMPILER__testing_stencil_1D_RelWithDebInfo ../tests/apps/stencil/testing_stencil_1D.c || cmake_object_order_depends_target_testing_stencil_1D + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/testing_stencil_1D.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/stencil -Itests/apps/stencil -Iparsec/include/fortran + OBJECT_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + OBJECT_FILE_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + TARGET_COMPILE_PDB = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/ + TARGET_PDB = tests/apps/stencil/testing_stencil_1D.pdb + +build tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_1D.c.o: C_COMPILER__testing_stencil_1D_RelWithDebInfo tests/apps/stencil/stencil_1D.c || cmake_object_order_depends_target_testing_stencil_1D + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_1D.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/stencil -Itests/apps/stencil -Iparsec/include/fortran + OBJECT_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + OBJECT_FILE_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + TARGET_COMPILE_PDB = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/ + TARGET_PDB = tests/apps/stencil/testing_stencil_1D.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target testing_stencil_1D + + +############################################# +# Link the executable tests/apps/stencil/testing_stencil_1D + +build tests/apps/stencil/testing_stencil_1D: C_EXECUTABLE_LINKER__testing_stencil_1D_RelWithDebInfo tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_internal.c.o tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/testing_stencil_1D.c.o tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/stencil_1D.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/stencil/ptgpp_testing_stencil_1D.stencil_1D parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/stencil/CMakeFiles/testing_stencil_1D.dir/ + TARGET_FILE = tests/apps/stencil/testing_stencil_1D + TARGET_PDB = tests/apps/stencil/testing_stencil_1D.pdb + + +############################################# +# Utility command for edit_cache + +build tests/apps/stencil/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/stencil/edit_cache: phony tests/apps/stencil/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for ptgpp_testing_stencil_1D.stencil_1D + +build tests/apps/stencil/ptgpp_testing_stencil_1D.stencil_1D: phony tests/apps/stencil/CMakeFiles/ptgpp_testing_stencil_1D.stencil_1D tests/apps/stencil/stencil_1D.h tests/apps/stencil/stencil_1D.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/apps/stencil/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/stencil/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/stencil/package: phony tests/apps/stencil/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/apps/stencil/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/stencil/test: phony tests/apps/stencil/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build tests/apps/stencil/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/stencil/package_source: phony tests/apps/stencil/CMakeFiles/package_source.util + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/stencil/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/stencil/rebuild_cache: phony tests/apps/stencil/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build tests/apps/stencil/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/stencil/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/stencil/install/strip: phony tests/apps/stencil/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/stencil/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/apps/stencil/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/stencil/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/stencil/install: phony tests/apps/stencil/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/apps/stencil/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/stencil/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/stencil/install/local: phony tests/apps/stencil/CMakeFiles/install/local.util + + +############################################# +# Custom command for tests/apps/stencil/stencil_1D.h + +build tests/apps/stencil/stencil_1D.h tests/apps/stencil/stencil_1D.c: CUSTOM_COMMAND ../tests/apps/stencil/stencil_1D.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/stencil/stencil_1D.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/stencil && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/stencil/stencil_1D.jdf -C stencil_1D.c -H stencil_1D.h -f stencil_1D + DESC = Generating stencil_1D.h, stencil_1D.c + restat = 1 + + +############################################# +# Phony custom command for tests/apps/stencil/CMakeFiles/ptgpp_testing_stencil_1D.stencil_1D + +build tests/apps/stencil/CMakeFiles/ptgpp_testing_stencil_1D.stencil_1D: phony tests/apps/stencil/stencil_1D.h tests/apps/stencil/stencil_1D.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/merge_sort/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/merge_sort/rebuild_cache: phony tests/apps/merge_sort/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for package + +build tests/apps/merge_sort/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/merge_sort/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/merge_sort/package: phony tests/apps/merge_sort/CMakeFiles/package.util + + +############################################# +# Utility command for ptgpp_merge_sort.merge_sort + +build tests/apps/merge_sort/ptgpp_merge_sort.merge_sort: phony tests/apps/merge_sort/CMakeFiles/ptgpp_merge_sort.merge_sort tests/apps/merge_sort/merge_sort.h tests/apps/merge_sort/merge_sort.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for test + +build tests/apps/merge_sort/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/merge_sort/test: phony tests/apps/merge_sort/CMakeFiles/test.util + +# ============================================================================= +# Object build statements for EXECUTABLE target merge_sort + + +############################################# +# Order-only phony target for merge_sort + +build cmake_object_order_depends_target_merge_sort: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/merge_sort/merge_sort.c tests/apps/merge_sort/merge_sort.h tests/apps/merge_sort/ptgpp_merge_sort.merge_sort + +build tests/apps/merge_sort/CMakeFiles/merge_sort.dir/main.c.o: C_COMPILER__merge_sort_RelWithDebInfo ../tests/apps/merge_sort/main.c || cmake_object_order_depends_target_merge_sort + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/merge_sort -Iparsec/include/fortran + OBJECT_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + OBJECT_FILE_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + TARGET_COMPILE_PDB = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/ + TARGET_PDB = tests/apps/merge_sort/merge_sort.pdb + +build tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort_wrapper.c.o: C_COMPILER__merge_sort_RelWithDebInfo ../tests/apps/merge_sort/merge_sort_wrapper.c || cmake_object_order_depends_target_merge_sort + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort_wrapper.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/merge_sort -Iparsec/include/fortran + OBJECT_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + OBJECT_FILE_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + TARGET_COMPILE_PDB = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/ + TARGET_PDB = tests/apps/merge_sort/merge_sort.pdb + +build tests/apps/merge_sort/CMakeFiles/merge_sort.dir/sort_data.c.o: C_COMPILER__merge_sort_RelWithDebInfo ../tests/apps/merge_sort/sort_data.c || cmake_object_order_depends_target_merge_sort + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/sort_data.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/merge_sort -Iparsec/include/fortran + OBJECT_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + OBJECT_FILE_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + TARGET_COMPILE_PDB = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/ + TARGET_PDB = tests/apps/merge_sort/merge_sort.pdb + +build tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort.c.o: C_COMPILER__merge_sort_RelWithDebInfo tests/apps/merge_sort/merge_sort.c || cmake_object_order_depends_target_merge_sort + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Itests/apps/merge_sort -Iparsec/include/fortran + OBJECT_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + OBJECT_FILE_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + TARGET_COMPILE_PDB = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/ + TARGET_PDB = tests/apps/merge_sort/merge_sort.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target merge_sort + + +############################################# +# Link the executable tests/apps/merge_sort/merge_sort + +build tests/apps/merge_sort/merge_sort: C_EXECUTABLE_LINKER__merge_sort_RelWithDebInfo tests/apps/merge_sort/CMakeFiles/merge_sort.dir/main.c.o tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort_wrapper.c.o tests/apps/merge_sort/CMakeFiles/merge_sort.dir/sort_data.c.o tests/apps/merge_sort/CMakeFiles/merge_sort.dir/merge_sort.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/merge_sort/ptgpp_merge_sort.merge_sort parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = tests/apps/merge_sort/CMakeFiles/merge_sort.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/merge_sort/CMakeFiles/merge_sort.dir/ + TARGET_FILE = tests/apps/merge_sort/merge_sort + TARGET_PDB = tests/apps/merge_sort/merge_sort.pdb + + +############################################# +# Utility command for package_source + +build tests/apps/merge_sort/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/merge_sort/package_source: phony tests/apps/merge_sort/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build tests/apps/merge_sort/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/merge_sort/edit_cache: phony tests/apps/merge_sort/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/apps/merge_sort/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/merge_sort/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/merge_sort/install/strip: phony tests/apps/merge_sort/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/merge_sort/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/apps/merge_sort/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/merge_sort/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/merge_sort/install: phony tests/apps/merge_sort/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build tests/apps/merge_sort/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/merge_sort/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/merge_sort/install/local: phony tests/apps/merge_sort/CMakeFiles/install/local.util + + +############################################# +# Phony custom command for tests/apps/merge_sort/CMakeFiles/ptgpp_merge_sort.merge_sort + +build tests/apps/merge_sort/CMakeFiles/ptgpp_merge_sort.merge_sort: phony tests/apps/merge_sort/merge_sort.h tests/apps/merge_sort/merge_sort.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for tests/apps/merge_sort/merge_sort.h + +build tests/apps/merge_sort/merge_sort.h tests/apps/merge_sort/merge_sort.c: CUSTOM_COMMAND ../tests/apps/merge_sort/merge_sort.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/merge_sort/merge_sort.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/merge_sort && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/merge_sort/merge_sort.jdf -C merge_sort.c -H merge_sort.h -f merge_sort + DESC = Generating merge_sort.h, merge_sort.c + restat = 1 + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/tests/apps/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build tests/apps/haar_tree/CMakeFiles/install/local.util: CUSTOM_COMMAND tests/apps/haar_tree/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build tests/apps/haar_tree/install/local: phony tests/apps/haar_tree/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target project + + +############################################# +# Order-only phony target for project + +build cmake_object_order_depends_target_project: phony || cmake_object_order_depends_target_parsec parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp tests/apps/haar_tree/project.c tests/apps/haar_tree/project.h tests/apps/haar_tree/ptgpp_project.project tests/apps/haar_tree/ptgpp_project.walk tests/apps/haar_tree/walk.c tests/apps/haar_tree/walk.h + +build tests/apps/haar_tree/CMakeFiles/project.dir/main.c.o: C_COMPILER__project_RelWithDebInfo ../tests/apps/haar_tree/main.c || cmake_object_order_depends_target_project + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/haar_tree/CMakeFiles/project.dir/main.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/haar_tree -Itests/apps/haar_tree -Iparsec/include/fortran + OBJECT_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + OBJECT_FILE_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + TARGET_COMPILE_PDB = tests/apps/haar_tree/CMakeFiles/project.dir/ + TARGET_PDB = tests/apps/haar_tree/project.pdb + +build tests/apps/haar_tree/CMakeFiles/project.dir/tree_dist.c.o: C_COMPILER__project_RelWithDebInfo ../tests/apps/haar_tree/tree_dist.c || cmake_object_order_depends_target_project + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/haar_tree/CMakeFiles/project.dir/tree_dist.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/haar_tree -Itests/apps/haar_tree -Iparsec/include/fortran + OBJECT_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + OBJECT_FILE_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + TARGET_COMPILE_PDB = tests/apps/haar_tree/CMakeFiles/project.dir/ + TARGET_PDB = tests/apps/haar_tree/project.pdb + +build tests/apps/haar_tree/CMakeFiles/project.dir/project.c.o: C_COMPILER__project_RelWithDebInfo tests/apps/haar_tree/project.c || cmake_object_order_depends_target_project + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/haar_tree/CMakeFiles/project.dir/project.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/haar_tree -Itests/apps/haar_tree -Iparsec/include/fortran + OBJECT_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + OBJECT_FILE_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + TARGET_COMPILE_PDB = tests/apps/haar_tree/CMakeFiles/project.dir/ + TARGET_PDB = tests/apps/haar_tree/project.pdb + +build tests/apps/haar_tree/CMakeFiles/project.dir/walk.c.o: C_COMPILER__project_RelWithDebInfo tests/apps/haar_tree/walk.c || cmake_object_order_depends_target_project + DEFINES = -D_GNU_SOURCE + DEP_FILE = tests/apps/haar_tree/CMakeFiles/project.dir/walk.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -I../tests/apps/haar_tree -Itests/apps/haar_tree -Iparsec/include/fortran + OBJECT_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + OBJECT_FILE_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + TARGET_COMPILE_PDB = tests/apps/haar_tree/CMakeFiles/project.dir/ + TARGET_PDB = tests/apps/haar_tree/project.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target project + + +############################################# +# Link the executable tests/apps/haar_tree/project + +build tests/apps/haar_tree/project: C_EXECUTABLE_LINKER__project_RelWithDebInfo tests/apps/haar_tree/CMakeFiles/project.dir/main.c.o tests/apps/haar_tree/CMakeFiles/project.dir/tree_dist.c.o tests/apps/haar_tree/CMakeFiles/project.dir/project.c.o tests/apps/haar_tree/CMakeFiles/project.dir/walk.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so tests/apps/haar_tree/ptgpp_project.project tests/apps/haar_tree/ptgpp_project.walk parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -lm -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lgcc_s -lgcc -lquadmath -lc -lgcc_s -lgcc -lquadmath -lc -lm -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic -pthread + OBJECT_DIR = tests/apps/haar_tree/CMakeFiles/project.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = tests/apps/haar_tree/CMakeFiles/project.dir/ + TARGET_FILE = tests/apps/haar_tree/project + TARGET_PDB = tests/apps/haar_tree/project.pdb + + +############################################# +# Utility command for package_source + +build tests/apps/haar_tree/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build tests/apps/haar_tree/package_source: phony tests/apps/haar_tree/CMakeFiles/package_source.util + + +############################################# +# Utility command for ptgpp_project.project + +build tests/apps/haar_tree/ptgpp_project.project: phony tests/apps/haar_tree/CMakeFiles/ptgpp_project.project tests/apps/haar_tree/project.h tests/apps/haar_tree/project.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for rebuild_cache + +build tests/apps/haar_tree/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build tests/apps/haar_tree/rebuild_cache: phony tests/apps/haar_tree/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for ptgpp_project.walk + +build tests/apps/haar_tree/ptgpp_project.walk: phony tests/apps/haar_tree/CMakeFiles/ptgpp_project.walk tests/apps/haar_tree/walk.h tests/apps/haar_tree/walk.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package + +build tests/apps/haar_tree/CMakeFiles/package.util: CUSTOM_COMMAND tests/apps/haar_tree/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build tests/apps/haar_tree/package: phony tests/apps/haar_tree/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build tests/apps/haar_tree/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build tests/apps/haar_tree/test: phony tests/apps/haar_tree/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build tests/apps/haar_tree/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build tests/apps/haar_tree/edit_cache: phony tests/apps/haar_tree/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install/strip + +build tests/apps/haar_tree/CMakeFiles/install/strip.util: CUSTOM_COMMAND tests/apps/haar_tree/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build tests/apps/haar_tree/install/strip: phony tests/apps/haar_tree/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build tests/apps/haar_tree/list_install_components: phony + + +############################################# +# Utility command for install + +build tests/apps/haar_tree/CMakeFiles/install.util: CUSTOM_COMMAND tests/apps/haar_tree/all + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build tests/apps/haar_tree/install: phony tests/apps/haar_tree/CMakeFiles/install.util + + +############################################# +# Custom command for tests/apps/haar_tree/project.h + +build tests/apps/haar_tree/project.h tests/apps/haar_tree/project.c: CUSTOM_COMMAND ../tests/apps/haar_tree/project.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/haar_tree/project.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/haar_tree/project.jdf -C project.c -H project.h -f project + DESC = Generating project.h, project.c + restat = 1 + + +############################################# +# Custom command for tests/apps/haar_tree/walk.h + +build tests/apps/haar_tree/walk.h tests/apps/haar_tree/walk.c: CUSTOM_COMMAND ../tests/apps/haar_tree/walk.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../tests/apps/haar_tree/walk.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/tests/apps/haar_tree && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/tests/apps/haar_tree/walk.jdf -C walk.c -H walk.h -f walk + DESC = Generating walk.h, walk.c + restat = 1 + + +############################################# +# Phony custom command for tests/apps/haar_tree/CMakeFiles/ptgpp_project.project + +build tests/apps/haar_tree/CMakeFiles/ptgpp_project.project: phony tests/apps/haar_tree/project.h tests/apps/haar_tree/project.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for tests/apps/haar_tree/CMakeFiles/ptgpp_project.walk + +build tests/apps/haar_tree/CMakeFiles/ptgpp_project.walk: phony tests/apps/haar_tree/walk.h tests/apps/haar_tree/walk.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build examples/CMakeFiles/install/strip.util: CUSTOM_COMMAND examples/all + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build examples/install/strip: phony examples/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build examples/CMakeFiles/install/local.util: CUSTOM_COMMAND examples/all + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build examples/install/local: phony examples/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target ex05 + + +############################################# +# Order-only phony target for ex05 + +build cmake_object_order_depends_target_ex05: phony || cmake_object_order_depends_target_parsec examples/Ex05_Broadcast.c examples/Ex05_Broadcast.h examples/ptgpp_ex05.Ex05_Broadcast parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex05.dir/Ex05_Broadcast.c.o: C_COMPILER__ex05_RelWithDebInfo examples/Ex05_Broadcast.c || cmake_object_order_depends_target_ex05 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex05.dir/Ex05_Broadcast.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex05.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex05.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex05.dir/ + TARGET_PDB = examples/ex05.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex05 + + +############################################# +# Link the executable examples/ex05 + +build examples/ex05: C_EXECUTABLE_LINKER__ex05_RelWithDebInfo examples/CMakeFiles/ex05.dir/Ex05_Broadcast.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex05.Ex05_Broadcast parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex05.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex05.dir/ + TARGET_FILE = examples/ex05 + TARGET_PDB = examples/ex05.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target ex03 + + +############################################# +# Order-only phony target for ex03 + +build cmake_object_order_depends_target_ex03: phony || cmake_object_order_depends_target_parsec examples/Ex03_ChainMPI.c examples/Ex03_ChainMPI.h examples/ptgpp_ex03.Ex03_ChainMPI parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex03.dir/Ex03_ChainMPI.c.o: C_COMPILER__ex03_RelWithDebInfo examples/Ex03_ChainMPI.c || cmake_object_order_depends_target_ex03 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex03.dir/Ex03_ChainMPI.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex03.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex03.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex03.dir/ + TARGET_PDB = examples/ex03.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex03 + + +############################################# +# Link the executable examples/ex03 + +build examples/ex03: C_EXECUTABLE_LINKER__ex03_RelWithDebInfo examples/CMakeFiles/ex03.dir/Ex03_ChainMPI.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex03.Ex03_ChainMPI parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex03.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex03.dir/ + TARGET_FILE = examples/ex03 + TARGET_PDB = examples/ex03.pdb + + +############################################# +# Utility command for rebuild_cache + +build examples/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build examples/rebuild_cache: phony examples/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for ptgpp_ex04.Ex04_ChainData + +build examples/ptgpp_ex04.Ex04_ChainData: phony examples/CMakeFiles/ptgpp_ex04.Ex04_ChainData examples/Ex04_ChainData.h examples/Ex04_ChainData.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_ex03.Ex03_ChainMPI + +build examples/ptgpp_ex03.Ex03_ChainMPI: phony examples/CMakeFiles/ptgpp_ex03.Ex03_ChainMPI examples/Ex03_ChainMPI.h examples/Ex03_ChainMPI.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target ex06 + + +############################################# +# Order-only phony target for ex06 + +build cmake_object_order_depends_target_ex06: phony || cmake_object_order_depends_target_parsec examples/Ex06_RAW.c examples/Ex06_RAW.h examples/ptgpp_ex06.Ex06_RAW parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex06.dir/Ex06_RAW.c.o: C_COMPILER__ex06_RelWithDebInfo examples/Ex06_RAW.c || cmake_object_order_depends_target_ex06 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex06.dir/Ex06_RAW.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex06.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex06.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex06.dir/ + TARGET_PDB = examples/ex06.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex06 + + +############################################# +# Link the executable examples/ex06 + +build examples/ex06: C_EXECUTABLE_LINKER__ex06_RelWithDebInfo examples/CMakeFiles/ex06.dir/Ex06_RAW.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex06.Ex06_RAW parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex06.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex06.dir/ + TARGET_FILE = examples/ex06 + TARGET_PDB = examples/ex06.pdb + +# ============================================================================= +# Object build statements for EXECUTABLE target ex02 + + +############################################# +# Order-only phony target for ex02 + +build cmake_object_order_depends_target_ex02: phony || cmake_object_order_depends_target_parsec examples/Ex02_Chain.c examples/Ex02_Chain.h examples/ptgpp_ex02.Ex02_Chain parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex02.dir/Ex02_Chain.c.o: C_COMPILER__ex02_RelWithDebInfo examples/Ex02_Chain.c || cmake_object_order_depends_target_ex02 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex02.dir/Ex02_Chain.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex02.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex02.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex02.dir/ + TARGET_PDB = examples/ex02.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex02 + + +############################################# +# Link the executable examples/ex02 + +build examples/ex02: C_EXECUTABLE_LINKER__ex02_RelWithDebInfo examples/CMakeFiles/ex02.dir/Ex02_Chain.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex02.Ex02_Chain parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex02.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex02.dir/ + TARGET_FILE = examples/ex02 + TARGET_PDB = examples/ex02.pdb + + +############################################# +# Utility command for ptgpp_ex06.Ex06_RAW + +build examples/ptgpp_ex06.Ex06_RAW: phony examples/CMakeFiles/ptgpp_ex06.Ex06_RAW examples/Ex06_RAW.h examples/Ex06_RAW.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for install + +build examples/CMakeFiles/install.util: CUSTOM_COMMAND examples/all + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build examples/install: phony examples/CMakeFiles/install.util + + +############################################# +# Utility command for ptgpp_ex02.Ex02_Chain + +build examples/ptgpp_ex02.Ex02_Chain: phony examples/CMakeFiles/ptgpp_ex02.Ex02_Chain examples/Ex02_Chain.h examples/Ex02_Chain.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target ex01 + + +############################################# +# Order-only phony target for ex01 + +build cmake_object_order_depends_target_ex01: phony || cmake_object_order_depends_target_parsec examples/Ex01_HelloWorld.c examples/Ex01_HelloWorld.h examples/ptgpp_ex01.Ex01_HelloWorld parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex01.dir/Ex01_HelloWorld.c.o: C_COMPILER__ex01_RelWithDebInfo examples/Ex01_HelloWorld.c || cmake_object_order_depends_target_ex01 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex01.dir/Ex01_HelloWorld.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex01.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex01.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex01.dir/ + TARGET_PDB = examples/ex01.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex01 + + +############################################# +# Link the executable examples/ex01 + +build examples/ex01: C_EXECUTABLE_LINKER__ex01_RelWithDebInfo examples/CMakeFiles/ex01.dir/Ex01_HelloWorld.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex01.Ex01_HelloWorld parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex01.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex01.dir/ + TARGET_FILE = examples/ex01 + TARGET_PDB = examples/ex01.pdb + + +############################################# +# Utility command for package + +build examples/CMakeFiles/package.util: CUSTOM_COMMAND examples/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build examples/package: phony examples/CMakeFiles/package.util + +# ============================================================================= +# Object build statements for EXECUTABLE target ex00 + + +############################################# +# Order-only phony target for ex00 + +build cmake_object_order_depends_target_ex00: phony || cmake_object_order_depends_target_parsec + +build examples/CMakeFiles/ex00.dir/Ex00_StartStop.c.o: C_COMPILER__ex00_RelWithDebInfo ../examples/Ex00_StartStop.c || cmake_object_order_depends_target_ex00 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex00.dir/Ex00_StartStop.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex00.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex00.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex00.dir/ + TARGET_PDB = examples/ex00.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex00 + + +############################################# +# Link the executable examples/ex00 + +build examples/ex00: C_EXECUTABLE_LINKER__ex00_RelWithDebInfo examples/CMakeFiles/ex00.dir/Ex00_StartStop.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex00.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex00.dir/ + TARGET_FILE = examples/ex00 + TARGET_PDB = examples/ex00.pdb + + +############################################# +# Utility command for ptgpp_ex05.Ex05_Broadcast + +build examples/ptgpp_ex05.Ex05_Broadcast: phony examples/CMakeFiles/ptgpp_ex05.Ex05_Broadcast examples/Ex05_Broadcast.h examples/Ex05_Broadcast.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= +# Object build statements for EXECUTABLE target ex07 + + +############################################# +# Order-only phony target for ex07 + +build cmake_object_order_depends_target_ex07: phony || cmake_object_order_depends_target_parsec examples/Ex07_RAW_CTL.c examples/Ex07_RAW_CTL.h examples/ptgpp_ex07.Ex07_RAW_CTL parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex07.dir/Ex07_RAW_CTL.c.o: C_COMPILER__ex07_RelWithDebInfo examples/Ex07_RAW_CTL.c || cmake_object_order_depends_target_ex07 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex07.dir/Ex07_RAW_CTL.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex07.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex07.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex07.dir/ + TARGET_PDB = examples/ex07.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex07 + + +############################################# +# Link the executable examples/ex07 + +build examples/ex07: C_EXECUTABLE_LINKER__ex07_RelWithDebInfo examples/CMakeFiles/ex07.dir/Ex07_RAW_CTL.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex07.Ex07_RAW_CTL parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex07.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex07.dir/ + TARGET_FILE = examples/ex07 + TARGET_PDB = examples/ex07.pdb + + +############################################# +# Utility command for ptgpp_ex07.Ex07_RAW_CTL + +build examples/ptgpp_ex07.Ex07_RAW_CTL: phony examples/CMakeFiles/ptgpp_ex07.Ex07_RAW_CTL examples/Ex07_RAW_CTL.h examples/Ex07_RAW_CTL.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for ptgpp_ex01.Ex01_HelloWorld + +build examples/ptgpp_ex01.Ex01_HelloWorld: phony examples/CMakeFiles/ptgpp_ex01.Ex01_HelloWorld examples/Ex01_HelloWorld.h examples/Ex01_HelloWorld.c parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + + +############################################# +# Utility command for package_source + +build examples/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build examples/package_source: phony examples/CMakeFiles/package_source.util + + +############################################# +# Utility command for test + +build examples/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build examples/test: phony examples/CMakeFiles/test.util + + +############################################# +# Utility command for edit_cache + +build examples/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build examples/edit_cache: phony examples/CMakeFiles/edit_cache.util + +# ============================================================================= +# Object build statements for EXECUTABLE target ex04 + + +############################################# +# Order-only phony target for ex04 + +build cmake_object_order_depends_target_ex04: phony || cmake_object_order_depends_target_parsec examples/Ex04_ChainData.c examples/Ex04_ChainData.h examples/ptgpp_ex04.Ex04_ChainData parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build examples/CMakeFiles/ex04.dir/Ex04_ChainData.c.o: C_COMPILER__ex04_RelWithDebInfo examples/Ex04_ChainData.c || cmake_object_order_depends_target_ex04 + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/CMakeFiles/ex04.dir/Ex04_ChainData.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iexamples -Iparsec/include/fortran + OBJECT_DIR = examples/CMakeFiles/ex04.dir + OBJECT_FILE_DIR = examples/CMakeFiles/ex04.dir + TARGET_COMPILE_PDB = examples/CMakeFiles/ex04.dir/ + TARGET_PDB = examples/ex04.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target ex04 + + +############################################# +# Link the executable examples/ex04 + +build examples/ex04: C_EXECUTABLE_LINKER__ex04_RelWithDebInfo examples/CMakeFiles/ex04.dir/Ex04_ChainData.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || examples/ptgpp_ex04.Ex04_ChainData parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/CMakeFiles/ex04.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/CMakeFiles/ex04.dir/ + TARGET_FILE = examples/ex04 + TARGET_PDB = examples/ex04.pdb + + +############################################# +# Utility command for list_install_components + +build examples/list_install_components: phony + + +############################################# +# Custom command for examples/Ex05_Broadcast.h + +build examples/Ex05_Broadcast.h examples/Ex05_Broadcast.c: CUSTOM_COMMAND ../examples/Ex05_Broadcast.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex05_Broadcast.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex05_Broadcast.jdf -C Ex05_Broadcast.c -H Ex05_Broadcast.h -f Ex05_Broadcast + DESC = Generating Ex05_Broadcast.h, Ex05_Broadcast.c + restat = 1 + + +############################################# +# Custom command for examples/Ex03_ChainMPI.h + +build examples/Ex03_ChainMPI.h examples/Ex03_ChainMPI.c: CUSTOM_COMMAND ../examples/Ex03_ChainMPI.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex03_ChainMPI.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex03_ChainMPI.jdf -C Ex03_ChainMPI.c -H Ex03_ChainMPI.h -f Ex03_ChainMPI + DESC = Generating Ex03_ChainMPI.h, Ex03_ChainMPI.c + restat = 1 + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex04.Ex04_ChainData + +build examples/CMakeFiles/ptgpp_ex04.Ex04_ChainData: phony examples/Ex04_ChainData.h examples/Ex04_ChainData.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for examples/Ex04_ChainData.h + +build examples/Ex04_ChainData.h examples/Ex04_ChainData.c: CUSTOM_COMMAND ../examples/Ex04_ChainData.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex04_ChainData.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex04_ChainData.jdf -C Ex04_ChainData.c -H Ex04_ChainData.h -f Ex04_ChainData + DESC = Generating Ex04_ChainData.h, Ex04_ChainData.c + restat = 1 + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex03.Ex03_ChainMPI + +build examples/CMakeFiles/ptgpp_ex03.Ex03_ChainMPI: phony examples/Ex03_ChainMPI.h examples/Ex03_ChainMPI.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for examples/Ex06_RAW.h + +build examples/Ex06_RAW.h examples/Ex06_RAW.c: CUSTOM_COMMAND ../examples/Ex06_RAW.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex06_RAW.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex06_RAW.jdf -C Ex06_RAW.c -H Ex06_RAW.h -f Ex06_RAW + DESC = Generating Ex06_RAW.h, Ex06_RAW.c + restat = 1 + + +############################################# +# Custom command for examples/Ex02_Chain.h + +build examples/Ex02_Chain.h examples/Ex02_Chain.c: CUSTOM_COMMAND ../examples/Ex02_Chain.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex02_Chain.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex02_Chain.jdf -C Ex02_Chain.c -H Ex02_Chain.h -f Ex02_Chain + DESC = Generating Ex02_Chain.h, Ex02_Chain.c + restat = 1 + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex06.Ex06_RAW + +build examples/CMakeFiles/ptgpp_ex06.Ex06_RAW: phony examples/Ex06_RAW.h examples/Ex06_RAW.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex02.Ex02_Chain + +build examples/CMakeFiles/ptgpp_ex02.Ex02_Chain: phony examples/Ex02_Chain.h examples/Ex02_Chain.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for examples/Ex01_HelloWorld.h + +build examples/Ex01_HelloWorld.h examples/Ex01_HelloWorld.c: CUSTOM_COMMAND ../examples/Ex01_HelloWorld.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex01_HelloWorld.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex01_HelloWorld.jdf -C Ex01_HelloWorld.c -H Ex01_HelloWorld.h -f Ex01_HelloWorld + DESC = Generating Ex01_HelloWorld.h, Ex01_HelloWorld.c + restat = 1 + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex05.Ex05_Broadcast + +build examples/CMakeFiles/ptgpp_ex05.Ex05_Broadcast: phony examples/Ex05_Broadcast.h examples/Ex05_Broadcast.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Custom command for examples/Ex07_RAW_CTL.h + +build examples/Ex07_RAW_CTL.h examples/Ex07_RAW_CTL.c: CUSTOM_COMMAND ../examples/Ex07_RAW_CTL.jdf parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp ../examples/Ex07_RAW_CTL.jdf || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + COMMAND = cd /home/joseph/parsec/parsec/examples && /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp -E -i /home/joseph/parsec/examples/Ex07_RAW_CTL.jdf -C Ex07_RAW_CTL.c -H Ex07_RAW_CTL.h -f Ex07_RAW_CTL + DESC = Generating Ex07_RAW_CTL.h, Ex07_RAW_CTL.c + restat = 1 + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex07.Ex07_RAW_CTL + +build examples/CMakeFiles/ptgpp_ex07.Ex07_RAW_CTL: phony examples/Ex07_RAW_CTL.h examples/Ex07_RAW_CTL.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + + +############################################# +# Phony custom command for examples/CMakeFiles/ptgpp_ex01.Ex01_HelloWorld + +build examples/CMakeFiles/ptgpp_ex01.Ex01_HelloWorld: phony examples/Ex01_HelloWorld.h examples/Ex01_HelloWorld.c || parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp parsec/libparsec-base.a parsec/parsec-base-obj + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/examples/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build examples/interfaces/dtd/CMakeFiles/install/local.util: CUSTOM_COMMAND examples/interfaces/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build examples/interfaces/dtd/install/local: phony examples/interfaces/dtd/CMakeFiles/install/local.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_example_hello_arg + + +############################################# +# Order-only phony target for dtd_example_hello_arg + +build cmake_object_order_depends_target_dtd_example_hello_arg: phony || cmake_object_order_depends_target_parsec + +build examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir/dtd_example_hello_arg.c.o: C_COMPILER__dtd_example_hello_arg_RelWithDebInfo ../examples/interfaces/dtd/dtd_example_hello_arg.c || cmake_object_order_depends_target_dtd_example_hello_arg + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir/dtd_example_hello_arg.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir + OBJECT_FILE_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir/ + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_arg.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_example_hello_arg + + +############################################# +# Link the executable examples/interfaces/dtd/dtd_example_hello_arg + +build examples/interfaces/dtd/dtd_example_hello_arg: C_EXECUTABLE_LINKER__dtd_example_hello_arg_RelWithDebInfo examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir/dtd_example_hello_arg.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_arg.dir/ + TARGET_FILE = examples/interfaces/dtd/dtd_example_hello_arg + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_arg.pdb + + +############################################# +# Utility command for install/strip + +build examples/interfaces/dtd/CMakeFiles/install/strip.util: CUSTOM_COMMAND examples/interfaces/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build examples/interfaces/dtd/install/strip: phony examples/interfaces/dtd/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build examples/interfaces/dtd/list_install_components: phony + + +############################################# +# Utility command for install + +build examples/interfaces/dtd/CMakeFiles/install.util: CUSTOM_COMMAND examples/interfaces/dtd/all + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build examples/interfaces/dtd/install: phony examples/interfaces/dtd/CMakeFiles/install.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_example_hello_world_untied + + +############################################# +# Order-only phony target for dtd_example_hello_world_untied + +build cmake_object_order_depends_target_dtd_example_hello_world_untied: phony || cmake_object_order_depends_target_parsec + +build examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir/dtd_example_hello_world_untied.c.o: C_COMPILER__dtd_example_hello_world_untied_RelWithDebInfo ../examples/interfaces/dtd/dtd_example_hello_world_untied.c || cmake_object_order_depends_target_dtd_example_hello_world_untied + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir/dtd_example_hello_world_untied.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir + OBJECT_FILE_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir/ + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_world_untied.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_example_hello_world_untied + + +############################################# +# Link the executable examples/interfaces/dtd/dtd_example_hello_world_untied + +build examples/interfaces/dtd/dtd_example_hello_world_untied: C_EXECUTABLE_LINKER__dtd_example_hello_world_untied_RelWithDebInfo examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir/dtd_example_hello_world_untied.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world_untied.dir/ + TARGET_FILE = examples/interfaces/dtd/dtd_example_hello_world_untied + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_world_untied.pdb + + +############################################# +# Utility command for package + +build examples/interfaces/dtd/CMakeFiles/package.util: CUSTOM_COMMAND examples/interfaces/dtd/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build examples/interfaces/dtd/package: phony examples/interfaces/dtd/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build examples/interfaces/dtd/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build examples/interfaces/dtd/test: phony examples/interfaces/dtd/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build examples/interfaces/dtd/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build examples/interfaces/dtd/package_source: phony examples/interfaces/dtd/CMakeFiles/package_source.util + +# ============================================================================= +# Object build statements for EXECUTABLE target dtd_example_hello_world + + +############################################# +# Order-only phony target for dtd_example_hello_world + +build cmake_object_order_depends_target_dtd_example_hello_world: phony || cmake_object_order_depends_target_parsec + +build examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir/dtd_example_hello_world.c.o: C_COMPILER__dtd_example_hello_world_RelWithDebInfo ../examples/interfaces/dtd/dtd_example_hello_world.c || cmake_object_order_depends_target_dtd_example_hello_world + DEFINES = -D_GNU_SOURCE + DEP_FILE = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir/dtd_example_hello_world.c.o.d + FLAGS = -O2 -g -DNDEBUG -m64 -Og -Wall -Wextra -fdiagnostics-color -pthread -std=gnu11 + INCLUDES = -Iparsec/include -I. -Iinclude -I../ -Iparsec/include/fortran + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir + OBJECT_FILE_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir/ + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_world.pdb + + +# ============================================================================= +# Link build statements for EXECUTABLE target dtd_example_hello_world + + +############################################# +# Link the executable examples/interfaces/dtd/dtd_example_hello_world + +build examples/interfaces/dtd/dtd_example_hello_world: C_EXECUTABLE_LINKER__dtd_example_hello_world_RelWithDebInfo examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir/dtd_example_hello_world.c.o | parsec/libparsec.so.4.0.0 /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so || parsec/libparsec.so parsec/libparsec.so + FLAGS = -O2 -g -DNDEBUG + LINK_FLAGS = -Wl,-rpath -Wl,/usr/local/lib -Wl,--enable-new-dtags -pthread + LINK_LIBRARIES = -Wl,-rpath,/home/joseph/parsec/parsec/parsec:/usr/local/lib parsec/libparsec.so.4.0.0 -ldl -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lc -lm -lgcc_s -lgcc -lquadmath -lc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic + OBJECT_DIR = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = examples/interfaces/dtd/CMakeFiles/dtd_example_hello_world.dir/ + TARGET_FILE = examples/interfaces/dtd/dtd_example_hello_world + TARGET_PDB = examples/interfaces/dtd/dtd_example_hello_world.pdb + + +############################################# +# Utility command for edit_cache + +build examples/interfaces/dtd/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build examples/interfaces/dtd/edit_cache: phony examples/interfaces/dtd/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build examples/interfaces/dtd/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/examples/interfaces/dtd && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build examples/interfaces/dtd/rebuild_cache: phony examples/interfaces/dtd/CMakeFiles/rebuild_cache.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/joseph/parsec/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for docs + +build docs/docs: phony docs/CMakeFiles/docs docs/docs-parsec + + +############################################# +# Utility command for package + +build docs/CMakeFiles/package.util: CUSTOM_COMMAND docs/all + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackConfig.cmake + DESC = Run CPack packaging tool... + pool = console + restat = 1 + +build docs/package: phony docs/CMakeFiles/package.util + + +############################################# +# Utility command for test + +build docs/CMakeFiles/test.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/ctest --force-new-ctest-process + DESC = Running tests... + pool = console + restat = 1 + +build docs/test: phony docs/CMakeFiles/test.util + + +############################################# +# Utility command for package_source + +build docs/CMakeFiles/package_source.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cpack --config ./CPackSourceConfig.cmake /home/joseph/parsec/parsec/CPackSourceConfig.cmake + DESC = Run CPack packaging tool for source... + pool = console + restat = 1 + +build docs/package_source: phony docs/CMakeFiles/package_source.util + + +############################################# +# Utility command for edit_cache + +build docs/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build docs/edit_cache: phony docs/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build docs/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake --regenerate-during-build -S/home/joseph/parsec -B/home/joseph/parsec/parsec + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build docs/rebuild_cache: phony docs/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for install/strip + +build docs/CMakeFiles/install/strip.util: CUSTOM_COMMAND docs/all + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build docs/install/strip: phony docs/CMakeFiles/install/strip.util + + +############################################# +# Utility command for list_install_components + +build docs/list_install_components: phony + + +############################################# +# Utility command for docs-parsec + +build docs/docs-parsec: phony docs/CMakeFiles/docs-parsec + + +############################################# +# Utility command for install + +build docs/CMakeFiles/install.util: CUSTOM_COMMAND docs/all + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build docs/install: phony docs/CMakeFiles/install.util + + +############################################# +# Utility command for install/local + +build docs/CMakeFiles/install/local.util: CUSTOM_COMMAND docs/all + COMMAND = cd /home/joseph/parsec/parsec/docs && /home/joseph/.local/lib/python3.8/site-packages/cmake/data/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build docs/install/local: phony docs/CMakeFiles/install/local.util + + +############################################# +# Phony custom command for docs/CMakeFiles/docs + +build docs/CMakeFiles/docs: phony || docs/docs-parsec + + +############################################# +# Custom command for docs/CMakeFiles/docs-parsec + +build docs/CMakeFiles/docs-parsec: CUSTOM_COMMAND + COMMAND = cd /home/joseph/parsec/parsec/docs && true && true + DESC = No documentation will be generated. Configure with BUILD_DOCUMENTATION set to ON + +# ============================================================================= +# Target aliases. + +build BT_reduction: phony tests/apps/generalized_reduction/BT_reduction + +build a2a: phony tests/apps/all2all/a2a + +build atomics: phony tests/class/atomics + +build atomics_inline: phony tests/class/atomics_inline + +build avoidable_reshape: phony tests/collections/reshape/avoidable_reshape + +build branching: phony tests/dsl/ptg/branching/branching + +build branching_ht: phony tests/dsl/ptg/branching/branching_ht + +build branching_idxarr: phony tests/dsl/ptg/branching/branching_idxarr + +build bw_test: phony tests/apps/pingpong/bw_test + +build choice: phony tests/dsl/ptg/choice/choice + +build complex_deps: phony tests/dsl/ptg/complex_deps + +build compose: phony tests/api/compose + +build ctlgat: phony tests/dsl/ptg/controlgather/ctlgat + +build docs: phony docs/docs + +build docs-parsec: phony docs/docs-parsec + +build dtd_example_hello_arg: phony examples/interfaces/dtd/dtd_example_hello_arg + +build dtd_example_hello_world: phony examples/interfaces/dtd/dtd_example_hello_world + +build dtd_example_hello_world_untied: phony examples/interfaces/dtd/dtd_example_hello_world_untied + +build dtd_test_allreduce: phony tests/dsl/dtd/dtd_test_allreduce + +build dtd_test_broadcast: phony tests/dsl/dtd/dtd_test_broadcast + +build dtd_test_data_flush: phony tests/dsl/dtd/dtd_test_data_flush + +build dtd_test_explicit_task_creation: phony tests/dsl/dtd/dtd_test_explicit_task_creation + +build dtd_test_flag_dont_track: phony tests/dsl/dtd/dtd_test_flag_dont_track + +build dtd_test_global_id_for_dc_assumed: phony tests/dsl/dtd/dtd_test_global_id_for_dc_assumed + +build dtd_test_hierarchy: phony tests/dsl/dtd/dtd_test_hierarchy + +build dtd_test_insert_task_interface: phony tests/dsl/dtd/dtd_test_insert_task_interface + +build dtd_test_interleave_actions: phony tests/dsl/dtd/dtd_test_interleave_actions + +build dtd_test_multiple_handle_wait: phony tests/dsl/dtd/dtd_test_multiple_handle_wait + +build dtd_test_null_as_tile: phony tests/dsl/dtd/dtd_test_null_as_tile + +build dtd_test_pingpong: phony tests/dsl/dtd/dtd_test_pingpong + +build dtd_test_reduce: phony tests/dsl/dtd/dtd_test_reduce + +build dtd_test_task_generation: phony tests/dsl/dtd/dtd_test_task_generation + +build dtd_test_task_inserting_task: phony tests/dsl/dtd/dtd_test_task_inserting_task + +build dtd_test_task_insertion: phony tests/dsl/dtd/dtd_test_task_insertion + +build dtd_test_task_placement: phony tests/dsl/dtd/dtd_test_task_placement + +build dtd_test_template_counter: phony tests/dsl/dtd/dtd_test_template_counter + +build dtd_test_tp_enqueue_dequeue: phony tests/dsl/dtd/dtd_test_tp_enqueue_dequeue + +build dtd_test_untie: phony tests/dsl/dtd/dtd_test_untie + +build dtd_test_war: phony tests/dsl/dtd/dtd_test_war + +build dtt_bug_replicator: phony tests/runtime/dtt_bug_replicator + +build ex00: phony examples/ex00 + +build ex01: phony examples/ex01 + +build ex02: phony examples/ex02 + +build ex03: phony examples/ex03 + +build ex04: phony examples/ex04 + +build ex05: phony examples/ex05 + +build ex06: phony examples/ex06 + +build ex07: phony examples/ex07 + +build future: phony tests/class/future + +build future_datacopy: phony tests/class/future_datacopy + +build hash: phony tests/class/hash + +build hash_inline: phony tests/class/hash_inline + +build input_dep_reshape_single_copy: phony tests/collections/reshape/input_dep_reshape_single_copy + +build jdf_forward_READ_NULL: phony tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL + +build jdf_forward_RW_NULL: phony tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL + +build kcyclic: phony tests/collections/kcyclic + +build libparsec-base.a: phony parsec/libparsec-base.a + +build libparsec.so: phony parsec/libparsec.so + +build lifo: phony tests/class/lifo + +build lifo_inline: phony tests/class/lifo_inline + +build list: phony tests/class/list + +build list_inline: phony tests/class/list_inline + +build local_indices: phony tests/dsl/ptg/local-indices/local_indices + +build merge_sort: phony tests/apps/merge_sort/merge_sort + +build multichain: phony tests/runtime/multichain + +build must_fail_too_many_in_deps: phony tests/dsl/ptg/ptgpp/must_fail_too_many_in_deps + +build must_fail_too_many_local_vars: phony tests/dsl/ptg/ptgpp/must_fail_too_many_local_vars + +build must_fail_too_many_out_deps: phony tests/dsl/ptg/ptgpp/must_fail_too_many_out_deps + +build must_fail_too_many_read_flows: phony tests/dsl/ptg/ptgpp/must_fail_too_many_read_flows + +build must_fail_too_many_write_flows: phony tests/dsl/ptg/ptgpp/must_fail_too_many_write_flows + +build operator: phony tests/api/operator + +build parsec: phony parsec/libparsec.so + +build parsec-base: phony parsec/libparsec-base.a + +build parsec-base-obj: phony parsec/parsec-base-obj + +build parsec-ptgpp: phony parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +build parsec-reader: phony tools/aggregator_visu/parsec-reader + +build parsec_build_tests: phony tests/parsec_build_tests + +build parsec_pregen_flex_utils: phony parsec/parsec_pregen_flex_utils + +build parsec_pregen_ptg: phony parsec/interfaces/ptg/ptg-compiler/parsec_pregen_ptg + +build project: phony tests/apps/haar_tree/project + +build ptgpp_BT_reduction.BT_reduction: phony tests/apps/generalized_reduction/ptgpp_BT_reduction.BT_reduction + +build ptgpp_a2a.a2a: phony tests/apps/all2all/ptgpp_a2a.a2a + +build ptgpp_avoidable_reshape.avoidable_reshape: phony tests/collections/reshape/ptgpp_avoidable_reshape.avoidable_reshape + +build ptgpp_branching.branching: phony tests/dsl/ptg/branching/ptgpp_branching.branching + +build ptgpp_branching_ht.branching_ht: phony tests/dsl/ptg/branching/ptgpp_branching_ht.branching_ht + +build ptgpp_branching_idxarr.branching_idxarr: phony tests/dsl/ptg/branching/ptgpp_branching_idxarr.branching_idxarr + +build ptgpp_bw_test.bandwidth: phony tests/apps/pingpong/ptgpp_bw_test.bandwidth + +build ptgpp_choice.choice: phony tests/dsl/ptg/choice/ptgpp_choice.choice + +build ptgpp_complex_deps.complex_deps: phony tests/dsl/ptg/ptgpp_complex_deps.complex_deps + +build ptgpp_ctlgat.ctlgat: phony tests/dsl/ptg/controlgather/ptgpp_ctlgat.ctlgat + +build ptgpp_dtt_bug_replicator.dtt_bug_replicator: phony tests/runtime/ptgpp_dtt_bug_replicator.dtt_bug_replicator + +build ptgpp_ex01.Ex01_HelloWorld: phony examples/ptgpp_ex01.Ex01_HelloWorld + +build ptgpp_ex02.Ex02_Chain: phony examples/ptgpp_ex02.Ex02_Chain + +build ptgpp_ex03.Ex03_ChainMPI: phony examples/ptgpp_ex03.Ex03_ChainMPI + +build ptgpp_ex04.Ex04_ChainData: phony examples/ptgpp_ex04.Ex04_ChainData + +build ptgpp_ex05.Ex05_Broadcast: phony examples/ptgpp_ex05.Ex05_Broadcast + +build ptgpp_ex06.Ex06_RAW: phony examples/ptgpp_ex06.Ex06_RAW + +build ptgpp_ex07.Ex07_RAW_CTL: phony examples/ptgpp_ex07.Ex07_RAW_CTL + +build ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape: phony tests/collections/reshape/ptgpp_input_dep_reshape_single_copy.input_dep_single_copy_reshape + +build ptgpp_jdf_forward_READ_NULL.forward_READ_NULL: phony tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_READ_NULL.forward_READ_NULL + +build ptgpp_jdf_forward_RW_NULL.forward_RW_NULL: phony tests/dsl/ptg/ptgpp/ptgpp_jdf_forward_RW_NULL.forward_RW_NULL + +build ptgpp_kcyclic.kcyclic: phony tests/collections/ptgpp_kcyclic.kcyclic + +build ptgpp_local_indices.local_indices: phony tests/dsl/ptg/local-indices/ptgpp_local_indices.local_indices + +build ptgpp_merge_sort.merge_sort: phony tests/apps/merge_sort/ptgpp_merge_sort.merge_sort + +build ptgpp_multichain.multichain: phony tests/runtime/ptgpp_multichain.multichain + +build ptgpp_must_fail_too_many_in_deps.too_many_in_deps: phony tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_in_deps.too_many_in_deps + +build ptgpp_must_fail_too_many_local_vars.too_many_local_vars: phony tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_local_vars.too_many_local_vars + +build ptgpp_must_fail_too_many_out_deps.too_many_out_deps: phony tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_out_deps.too_many_out_deps + +build ptgpp_must_fail_too_many_read_flows.too_many_read_flows: phony tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_read_flows.too_many_read_flows + +build ptgpp_must_fail_too_many_write_flows.too_many_write_flows: phony tests/dsl/ptg/ptgpp/ptgpp_must_fail_too_many_write_flows.too_many_write_flows + +build ptgpp_parsec.apply: phony parsec/data_dist/matrix/ptgpp_parsec.apply + +build ptgpp_parsec.diag_band_to_rect: phony parsec/data_dist/matrix/ptgpp_parsec.diag_band_to_rect + +build ptgpp_parsec.redistribute: phony parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute + +build ptgpp_parsec.redistribute_reshuffle: phony parsec/data_dist/matrix/redistribute/ptgpp_parsec.redistribute_reshuffle + +build ptgpp_parsec.reduce: phony parsec/data_dist/matrix/ptgpp_parsec.reduce + +build ptgpp_parsec.reduce_col: phony parsec/data_dist/matrix/ptgpp_parsec.reduce_col + +build ptgpp_parsec.reduce_row: phony parsec/data_dist/matrix/ptgpp_parsec.reduce_row + +build ptgpp_project.project: phony tests/apps/haar_tree/ptgpp_project.project + +build ptgpp_project.walk: phony tests/apps/haar_tree/ptgpp_project.walk + +build ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow: phony tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow + +build ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps: phony tests/collections/reshape/ptgpp_remote_multiple_outs_same_pred_flow.remote_multiple_outs_same_pred_flow_multiple_deps + +build ptgpp_reshape.local_input_LU_LL: phony tests/collections/reshape/ptgpp_reshape.local_input_LU_LL + +build ptgpp_reshape.local_input_reshape: phony tests/collections/reshape/ptgpp_reshape.local_input_reshape + +build ptgpp_reshape.local_no_reshape: phony tests/collections/reshape/ptgpp_reshape.local_no_reshape + +build ptgpp_reshape.local_output_reshape: phony tests/collections/reshape/ptgpp_reshape.local_output_reshape + +build ptgpp_reshape.local_read_reshape: phony tests/collections/reshape/ptgpp_reshape.local_read_reshape + +build ptgpp_reshape.remote_no_re_reshape: phony tests/collections/reshape/ptgpp_reshape.remote_no_re_reshape + +build ptgpp_reshape.remote_read_reshape: phony tests/collections/reshape/ptgpp_reshape.remote_read_reshape + +build ptgpp_rtt.rtt: phony tests/apps/pingpong/ptgpp_rtt.rtt + +build ptgpp_schedmicro.ep: phony tests/runtime/scheduling/ptgpp_schedmicro.ep + +build ptgpp_startup.startup: phony tests/dsl/ptg/ptgpp_startup.startup + +build ptgpp_strange.strange: phony tests/dsl/ptg/ptgpp_strange.strange + +build ptgpp_testing_band.two_dim_band: phony tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band + +build ptgpp_testing_band.two_dim_band_free: phony tests/collections/two_dim_band/ptgpp_testing_band.two_dim_band_free + +build ptgpp_testing_redistribute.redistribute_bound: phony tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_bound + +build ptgpp_testing_redistribute.redistribute_check: phony tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check + +build ptgpp_testing_redistribute.redistribute_check2: phony tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_check2 + +build ptgpp_testing_redistribute.redistribute_no_optimization: phony tests/collections/redistribute/ptgpp_testing_redistribute.redistribute_no_optimization + +build ptgpp_testing_redistribute_random.redistribute_bound: phony tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_bound + +build ptgpp_testing_redistribute_random.redistribute_check: phony tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check + +build ptgpp_testing_redistribute_random.redistribute_check2: phony tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_check2 + +build ptgpp_testing_redistribute_random.redistribute_no_optimization: phony tests/collections/redistribute/ptgpp_testing_redistribute_random.redistribute_no_optimization + +build ptgpp_testing_stencil_1D.stencil_1D: phony tests/apps/stencil/ptgpp_testing_stencil_1D.stencil_1D + +build ptgpp_touch_ex.touch: phony tests/api/ptgpp_touch_ex.touch + +build ptgpp_touch_ex_inline.touch: phony tests/api/ptgpp_touch_ex_inline.touch + +build ptgpp_touch_exf.touch: phony tests/api/ptgpp_touch_exf.touch + +build ptgpp_udf.udf: phony tests/dsl/ptg/user-defined-functions/ptgpp_udf.udf + +build ptgpp_write_check.write_check: phony tests/dsl/ptg/ptgpp/ptgpp_write_check.write_check + +build reduce: phony tests/collections/reduce + +build remote_multiple_outs_same_pred_flow: phony tests/collections/reshape/remote_multiple_outs_same_pred_flow + +build reshape: phony tests/collections/reshape/reshape + +build rtt: phony tests/apps/pingpong/rtt + +build rwlock: phony tests/class/rwlock + +build rwlock_inline: phony tests/class/rwlock_inline + +build schedmicro: phony tests/runtime/scheduling/schedmicro + +build startup: phony tests/dsl/ptg/startup + +build strange: phony tests/dsl/ptg/strange + +build testing_band: phony tests/collections/two_dim_band/testing_band + +build testing_redistribute: phony tests/collections/redistribute/testing_redistribute + +build testing_redistribute_random: phony tests/collections/redistribute/testing_redistribute_random + +build testing_stencil_1D: phony tests/apps/stencil/testing_stencil_1D + +build tests_common: phony tests/tests_common + +build touch_ex: phony tests/api/touch_ex + +build touch_ex_inline: phony tests/api/touch_ex_inline + +build touch_exf: phony tests/api/touch_exf + +build udf: phony tests/dsl/ptg/user-defined-functions/udf + +build write_check: phony tests/dsl/ptg/ptgpp/write_check + +# ============================================================================= +# Folder targets. + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec + +build all: phony build_with_parsec tools/all parsec/all tests/all examples/all docs/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/docs + +build docs/all: phony + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/examples + +build examples/all: phony examples/ex05 examples/ex03 examples/ex06 examples/ex02 examples/ex01 examples/ex00 examples/ex07 examples/ex04 examples/interfaces/dtd/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/examples/interfaces/dtd + +build examples/interfaces/dtd/all: phony examples/interfaces/dtd/dtd_example_hello_arg examples/interfaces/dtd/dtd_example_hello_world_untied examples/interfaces/dtd/dtd_example_hello_world + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec + +build parsec/all: phony parsec/libparsec.so parsec/libparsec-base.a parsec/parsec-base-obj parsec/interfaces/ptg/all parsec/data_dist/all parsec/fortran/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/data_dist + +build parsec/data_dist/all: phony parsec/data_dist/matrix/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/data_dist/matrix + +build parsec/data_dist/matrix/all: phony parsec/data_dist/matrix/redistribute/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/data_dist/matrix/redistribute + +build parsec/data_dist/matrix/redistribute/all: phony + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/fortran + +build parsec/fortran/all: phony + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/interfaces/ptg + +build parsec/interfaces/ptg/all: phony parsec/interfaces/ptg/ptg-compiler/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/parsec/interfaces/ptg/ptg-compiler + +build parsec/interfaces/ptg/ptg-compiler/all: phony parsec/interfaces/ptg/ptg-compiler/parsec-ptgpp + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests + +build tests/all: phony tests/tests_common tests/class/all tests/api/all tests/runtime/all tests/dsl/ptg/all tests/collections/all tests/dsl/dtd/all tests/apps/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/api + +build tests/api/all: phony tests/api/touch_ex tests/api/touch_ex_inline tests/api/operator tests/api/touch_exf tests/api/compose + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps + +build tests/apps/all: phony tests/apps/pingpong/all tests/apps/all2all/all tests/apps/generalized_reduction/all tests/apps/stencil/all tests/apps/merge_sort/all tests/apps/haar_tree/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/all2all + +build tests/apps/all2all/all: phony tests/apps/all2all/a2a + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/generalized_reduction + +build tests/apps/generalized_reduction/all: phony tests/apps/generalized_reduction/BT_reduction + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/haar_tree + +build tests/apps/haar_tree/all: phony tests/apps/haar_tree/project + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/merge_sort + +build tests/apps/merge_sort/all: phony tests/apps/merge_sort/merge_sort + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/pingpong + +build tests/apps/pingpong/all: phony tests/apps/pingpong/bw_test tests/apps/pingpong/rtt + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/apps/stencil + +build tests/apps/stencil/all: phony tests/apps/stencil/testing_stencil_1D + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/class + +build tests/class/all: phony tests/class/hash_inline tests/class/list_inline tests/class/atomics tests/class/future tests/class/lifo_inline tests/class/future_datacopy tests/class/lifo tests/class/rwlock_inline tests/class/rwlock tests/class/hash tests/class/list tests/class/atomics_inline + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/collections + +build tests/collections/all: phony tests/collections/reduce tests/collections/kcyclic tests/collections/two_dim_band/all tests/collections/redistribute/all tests/collections/reshape/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/collections/redistribute + +build tests/collections/redistribute/all: phony tests/collections/redistribute/testing_redistribute tests/collections/redistribute/testing_redistribute_random + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/collections/reshape + +build tests/collections/reshape/all: phony tests/collections/reshape/avoidable_reshape tests/collections/reshape/reshape tests/collections/reshape/remote_multiple_outs_same_pred_flow tests/collections/reshape/input_dep_reshape_single_copy + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/collections/two_dim_band + +build tests/collections/two_dim_band/all: phony tests/collections/two_dim_band/testing_band + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/dtd + +build tests/dsl/dtd/all: phony tests/dsl/dtd/dtd_test_flag_dont_track tests/dsl/dtd/dtd_test_multiple_handle_wait tests/dsl/dtd/dtd_test_task_generation tests/dsl/dtd/dtd_test_task_insertion tests/dsl/dtd/dtd_test_global_id_for_dc_assumed tests/dsl/dtd/dtd_test_war tests/dsl/dtd/dtd_test_hierarchy tests/dsl/dtd/dtd_test_null_as_tile tests/dsl/dtd/dtd_test_pingpong tests/dsl/dtd/dtd_test_allreduce tests/dsl/dtd/dtd_test_insert_task_interface tests/dsl/dtd/dtd_test_template_counter tests/dsl/dtd/dtd_test_task_inserting_task tests/dsl/dtd/dtd_test_untie tests/dsl/dtd/dtd_test_task_placement tests/dsl/dtd/dtd_test_data_flush tests/dsl/dtd/dtd_test_broadcast tests/dsl/dtd/dtd_test_explicit_task_creation tests/dsl/dtd/dtd_test_reduce tests/dsl/dtd/dtd_test_tp_enqueue_dequeue tests/dsl/dtd/dtd_test_interleave_actions + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg + +build tests/dsl/ptg/all: phony tests/dsl/ptg/strange tests/dsl/ptg/startup tests/dsl/ptg/complex_deps tests/dsl/ptg/ptgpp/all tests/dsl/ptg/branching/all tests/dsl/ptg/choice/all tests/dsl/ptg/controlgather/all tests/dsl/ptg/user-defined-functions/all tests/dsl/ptg/cuda/all tests/dsl/ptg/local-indices/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/branching + +build tests/dsl/ptg/branching/all: phony tests/dsl/ptg/branching/branching tests/dsl/ptg/branching/branching_ht tests/dsl/ptg/branching/branching_idxarr + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/choice + +build tests/dsl/ptg/choice/all: phony tests/dsl/ptg/choice/choice + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/controlgather + +build tests/dsl/ptg/controlgather/all: phony tests/dsl/ptg/controlgather/ctlgat + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/cuda + +build tests/dsl/ptg/cuda/all: phony + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/local-indices + +build tests/dsl/ptg/local-indices/all: phony tests/dsl/ptg/local-indices/local_indices + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/ptgpp + +build tests/dsl/ptg/ptgpp/all: phony tests/dsl/ptg/ptgpp/jdf_forward_READ_NULL tests/dsl/ptg/ptgpp/jdf_forward_RW_NULL tests/dsl/ptg/ptgpp/write_check + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/dsl/ptg/user-defined-functions + +build tests/dsl/ptg/user-defined-functions/all: phony tests/dsl/ptg/user-defined-functions/udf + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/runtime + +build tests/runtime/all: phony tests/runtime/multichain tests/runtime/dtt_bug_replicator tests/runtime/scheduling/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tests/runtime/scheduling + +build tests/runtime/scheduling/all: phony tests/runtime/scheduling/schedmicro + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tools + +build tools/all: phony tools/profiling/all tools/aggregator_visu/all + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tools/aggregator_visu + +build tools/aggregator_visu/all: phony tools/aggregator_visu/parsec-reader + +# ============================================================================= + +############################################# +# Folder: /home/joseph/parsec/parsec/tools/profiling + +build tools/profiling/all: phony + +# ============================================================================= +# Built-in targets + + +############################################# +# Re-run CMake if any of its inputs changed. + +build build.ninja: RERUN_CMAKE | ../CMakeLists.txt ../CTestConfig.cmake ../cmake_modules/AddDocumentedFiles.cmake ../cmake_modules/CheckAtomicIntrinsic.cmake ../cmake_modules/CheckStructureFieldOffset.c.in ../cmake_modules/CheckStructureFieldOffset.cmake ../cmake_modules/FindHWLOC.cmake ../cmake_modules/FindPAPI.cmake ../cmake_modules/PaRSECConfig.cmake.in ../cmake_modules/ParsecCompilePTG.cmake ../cmake_modules/ParsecCompilerFlags.cmake ../contrib/build_with_parsec/CMakeLists.txt.in ../contrib/build_with_parsec/Makefile.in ../docs/CMakeLists.txt ../examples/CMakeLists.txt ../examples/interfaces/dtd/CMakeLists.txt ../tests/CMakeLists.txt ../tests/Testings.cmake ../tests/api/CMakeLists.txt ../tests/api/Testings.cmake ../tests/apps/CMakeLists.txt ../tests/apps/Testings.cmake ../tests/apps/all2all/CMakeLists.txt ../tests/apps/generalized_reduction/CMakeLists.txt ../tests/apps/haar_tree/CMakeLists.txt ../tests/apps/haar_tree/Testings.cmake ../tests/apps/merge_sort/CMakeLists.txt ../tests/apps/merge_sort/Testings.cmake ../tests/apps/pingpong/CMakeLists.txt ../tests/apps/stencil/CMakeLists.txt ../tests/class/CMakeLists.txt ../tests/class/Testings.cmake ../tests/collections/CMakeLists.txt ../tests/collections/Testings.cmake ../tests/collections/redistribute/CMakeLists.txt ../tests/collections/reshape/CMakeLists.txt ../tests/collections/two_dim_band/CMakeLists.txt ../tests/dsl/dtd/CMakeLists.txt ../tests/dsl/dtd/Testings.cmake ../tests/dsl/ptg/CMakeLists.txt ../tests/dsl/ptg/Testings.cmake ../tests/dsl/ptg/branching/CMakeLists.txt ../tests/dsl/ptg/branching/Testings.cmake ../tests/dsl/ptg/choice/CMakeLists.txt ../tests/dsl/ptg/controlgather/CMakeLists.txt ../tests/dsl/ptg/cuda/CMakeLists.txt ../tests/dsl/ptg/local-indices/CMakeLists.txt ../tests/dsl/ptg/ptgpp/CMakeLists.txt ../tests/dsl/ptg/ptgpp/Testings.cmake ../tests/dsl/ptg/user-defined-functions/CMakeLists.txt ../tests/dsl/ptg/user-defined-functions/Testings.cmake ../tests/profiling/Testings.cmake ../tests/runtime/CMakeLists.txt ../tests/runtime/Testings.cmake ../tests/runtime/scheduling/CMakeLists.txt ../tests/runtime/scheduling/Testings.cmake ../tools/CMakeLists.txt ../tools/aggregator_visu/CMakeLists.txt ../tools/profiling/CMakeLists.txt /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/BasicConfigVersion-SameMajorVersion.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCCompilerABI.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXCompilerABI.cpp /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCommonLanguageInclude.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCompilerIdDetection.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeConfigurableFile.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDependentOption.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCXXCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompileFeatures.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompilerABI.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompilerId.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineFortranCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineSystem.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFindBinUtils.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranCompilerABI.F /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeGenericSystem.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeInitializeConfigs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeLanguageInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeNinjaFindMake.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakePackageConfigHelpers.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseImplicitIncludeInfo.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseImplicitLinkInfo.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseLibraryArchitecture.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakePushCheckState.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystem.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystemSpecificInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystemSpecificInitialize.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCXXCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCompilerCommon.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestFortranCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CPack.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CPackComponent.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTest.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTestTargets.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTestUseLaunchers.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCSourceRuns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCXXCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCXXSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckForPthreads.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFortranCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFortranSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFunctionExists.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFunctionExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFile.c.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFile.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFileCXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckLanguage.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckLibraryExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckStructHasMember.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckSymbolExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckTypeSize.c.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckTypeSize.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ADSP-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ARMCC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ARMClang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/AppleClang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Borland-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Bruce-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/CMakeCommonCompilerMacros.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Clang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Clang-DetermineCompilerInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Compaq-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Cray-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Embarcadero-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Fujitsu-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GHS-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-C.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-FindBinUtils.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-Fortran.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/HP-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/HP-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IAR-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Intel-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IntelLLVM-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/MSVC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/NVHPC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/NVIDIA-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/PGI-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/PathScale-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SCO-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SDCC-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SunPro-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/TI-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Watcom-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XL-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XL-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XLClang-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/zOS-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/DartConfiguration.tcl.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindBISON.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindCUDAToolkit.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindFLEX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindLibXml2.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/mpiver.f90.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/test_mpi.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/test_mpi.f90.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPackageHandleStandardArgs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPackageMessage.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPkgConfig.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPython.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPython/Support.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindThreads.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Detect.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Input.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Macro.h.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Output.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/GNUInstallDirs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/InstallRequiredSystemLibraries.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckSourceRuns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/FeatureTesting.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-Determine-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-C.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-Fortran.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/UnixPaths.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/WriteBasicConfigVersionFile.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Templates/CPackConfig.cmake.in CMakeCache.txt CMakeFiles/3.20.2/CMakeCCompiler.cmake CMakeFiles/3.20.2/CMakeCXXCompiler.cmake CMakeFiles/3.20.2/CMakeFortranCompiler.cmake CMakeFiles/3.20.2/CMakeSystem.cmake CMakeFiles/CheckCXX/result.cmake CMakeFiles/CheckFortran/result.cmake CMakeFiles/CheckIncludeFiles/PARSEC_ATOMIC_USE_C11_ATOMICS.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_COMPLEX_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_CTYPE_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_DLFCN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_ERRNO_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_EXECINFO_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_GEN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_GETOPT_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_LIMITS_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDARG_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDBOOL_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDDEF_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STRING_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYSLOG_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_MMAN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_PARAM_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_TYPES_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_UNISTD_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_VALGRIND_API.c CMakeFiles/CheckStructureFieldOffset/PARSEC_LIFO_HEAD_OFFSET.c CMakeFiles/CheckTypeSize/INT128.c CMakeFiles/CheckTypeSize/PARSEC_LIFO_OPAQUE_SIZEOF.c CMakeFiles/FindMPI/mpiver.f90 CMakeFiles/FindMPI/test_mpi.cpp CMakeFiles/FindMPI/test_mpi.f90 CMakeFiles/FortranCInterface/Output.cmake CMakeFiles/FortranCInterface/exe-Release.cmake CMakeLists.txt class/lifo-external.h.in data_dist/CMakeLists.txt data_dist/matrix/CMakeLists.txt data_dist/matrix/redistribute/CMakeLists.txt fortran/CMakeLists.txt include/parsec.pc.in include/parsec/parsec_config.h.in include/parsec/parsec_options.h.in interfaces/CMakeLists.txt interfaces/dtd/CMakeLists.txt interfaces/ptg/CMakeLists.txt interfaces/ptg/ptg-compiler/CMakeLists.txt mca/CMakeLists.txt mca/device/CMakeLists.txt mca/device/cuda/ValidateModule.CMake mca/mca_static_components.h.in mca/pins/CMakeLists.txt mca/pins/alperf/ValidateModule.CMake mca/pins/iterators_checker/ValidateModule.CMake mca/pins/papi/ValidateModule.CMake mca/pins/print_steals/ValidateModule.CMake mca/pins/ptg_to_dtd/ValidateModule.CMake mca/pins/task_granularity/ValidateModule.CMake mca/pins/task_profiler/ValidateModule.CMake mca/sched/lhq/ValidateModule.CMake + pool = console + + +############################################# +# A missing CMake input file is not an error. + +build ../CMakeLists.txt ../CTestConfig.cmake ../cmake_modules/AddDocumentedFiles.cmake ../cmake_modules/CheckAtomicIntrinsic.cmake ../cmake_modules/CheckStructureFieldOffset.c.in ../cmake_modules/CheckStructureFieldOffset.cmake ../cmake_modules/FindHWLOC.cmake ../cmake_modules/FindPAPI.cmake ../cmake_modules/PaRSECConfig.cmake.in ../cmake_modules/ParsecCompilePTG.cmake ../cmake_modules/ParsecCompilerFlags.cmake ../contrib/build_with_parsec/CMakeLists.txt.in ../contrib/build_with_parsec/Makefile.in ../docs/CMakeLists.txt ../examples/CMakeLists.txt ../examples/interfaces/dtd/CMakeLists.txt ../tests/CMakeLists.txt ../tests/Testings.cmake ../tests/api/CMakeLists.txt ../tests/api/Testings.cmake ../tests/apps/CMakeLists.txt ../tests/apps/Testings.cmake ../tests/apps/all2all/CMakeLists.txt ../tests/apps/generalized_reduction/CMakeLists.txt ../tests/apps/haar_tree/CMakeLists.txt ../tests/apps/haar_tree/Testings.cmake ../tests/apps/merge_sort/CMakeLists.txt ../tests/apps/merge_sort/Testings.cmake ../tests/apps/pingpong/CMakeLists.txt ../tests/apps/stencil/CMakeLists.txt ../tests/class/CMakeLists.txt ../tests/class/Testings.cmake ../tests/collections/CMakeLists.txt ../tests/collections/Testings.cmake ../tests/collections/redistribute/CMakeLists.txt ../tests/collections/reshape/CMakeLists.txt ../tests/collections/two_dim_band/CMakeLists.txt ../tests/dsl/dtd/CMakeLists.txt ../tests/dsl/dtd/Testings.cmake ../tests/dsl/ptg/CMakeLists.txt ../tests/dsl/ptg/Testings.cmake ../tests/dsl/ptg/branching/CMakeLists.txt ../tests/dsl/ptg/branching/Testings.cmake ../tests/dsl/ptg/choice/CMakeLists.txt ../tests/dsl/ptg/controlgather/CMakeLists.txt ../tests/dsl/ptg/cuda/CMakeLists.txt ../tests/dsl/ptg/local-indices/CMakeLists.txt ../tests/dsl/ptg/ptgpp/CMakeLists.txt ../tests/dsl/ptg/ptgpp/Testings.cmake ../tests/dsl/ptg/user-defined-functions/CMakeLists.txt ../tests/dsl/ptg/user-defined-functions/Testings.cmake ../tests/profiling/Testings.cmake ../tests/runtime/CMakeLists.txt ../tests/runtime/Testings.cmake ../tests/runtime/scheduling/CMakeLists.txt ../tests/runtime/scheduling/Testings.cmake ../tools/CMakeLists.txt ../tools/aggregator_visu/CMakeLists.txt ../tools/profiling/CMakeLists.txt /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/BasicConfigVersion-SameMajorVersion.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCCompilerABI.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXCompilerABI.cpp /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCXXInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCommonLanguageInclude.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeCompilerIdDetection.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeConfigurableFile.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDependentOption.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCXXCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompileFeatures.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompilerABI.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineCompilerId.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineFortranCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeDetermineSystem.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFindBinUtils.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranCompiler.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranCompilerABI.F /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeFortranInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeGenericSystem.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeInitializeConfigs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeLanguageInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeNinjaFindMake.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakePackageConfigHelpers.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseImplicitIncludeInfo.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseImplicitLinkInfo.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeParseLibraryArchitecture.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakePushCheckState.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystem.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystemSpecificInformation.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeSystemSpecificInitialize.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCXXCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestCompilerCommon.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CMakeTestFortranCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CPack.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CPackComponent.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTest.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTestTargets.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CTestUseLaunchers.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCSourceRuns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCXXCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckCXXSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckForPthreads.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFortranCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFortranSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFunctionExists.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckFunctionExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFile.c.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFile.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFileCXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckIncludeFiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckLanguage.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckLibraryExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckStructHasMember.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckSymbolExists.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckTypeSize.c.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/CheckTypeSize.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ADSP-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ARMCC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/ARMClang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/AppleClang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Borland-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Bruce-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/CMakeCommonCompilerMacros.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Clang-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Clang-DetermineCompilerInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Compaq-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Cray-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Embarcadero-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Fujitsu-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GHS-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-C.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-FindBinUtils.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU-Fortran.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/GNU.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/HP-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/HP-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IAR-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Intel-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/IntelLLVM-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/MSVC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/NVHPC-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/NVIDIA-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/PGI-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/PathScale-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SCO-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SDCC-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SunPro-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/TI-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/Watcom-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XL-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XL-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XLClang-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/zOS-C-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/DartConfiguration.tcl.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindBISON.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindCUDAToolkit.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindFLEX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindLibXml2.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/mpiver.f90.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/test_mpi.c /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindMPI/test_mpi.f90.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPackageHandleStandardArgs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPackageMessage.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPkgConfig.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPython.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindPython/Support.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FindThreads.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Detect.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Input.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Macro.h.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/FortranCInterface/Output.cmake.in /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/GNUInstallDirs.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/InstallRequiredSystemLibraries.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckCompilerFlag.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckSourceCompiles.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/CheckSourceRuns.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Internal/FeatureTesting.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-Determine-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-C.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-CXX.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU-Fortran.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux-GNU.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/Linux.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/Platform/UnixPaths.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Modules/WriteBasicConfigVersionFile.cmake /home/joseph/.local/lib/python3.8/site-packages/cmake/data/share/cmake-3.20/Templates/CPackConfig.cmake.in CMakeCache.txt CMakeFiles/3.20.2/CMakeCCompiler.cmake CMakeFiles/3.20.2/CMakeCXXCompiler.cmake CMakeFiles/3.20.2/CMakeFortranCompiler.cmake CMakeFiles/3.20.2/CMakeSystem.cmake CMakeFiles/CheckCXX/result.cmake CMakeFiles/CheckFortran/result.cmake CMakeFiles/CheckIncludeFiles/PARSEC_ATOMIC_USE_C11_ATOMICS.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_COMPLEX_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_CTYPE_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_DLFCN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_ERRNO_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_EXECINFO_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_GEN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_GETOPT_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_LIMITS_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDARG_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDBOOL_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STDDEF_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_STRING_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYSLOG_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_MMAN_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_PARAM_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_SYS_TYPES_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_UNISTD_H.c CMakeFiles/CheckIncludeFiles/PARSEC_HAVE_VALGRIND_API.c CMakeFiles/CheckStructureFieldOffset/PARSEC_LIFO_HEAD_OFFSET.c CMakeFiles/CheckTypeSize/INT128.c CMakeFiles/CheckTypeSize/PARSEC_LIFO_OPAQUE_SIZEOF.c CMakeFiles/FindMPI/mpiver.f90 CMakeFiles/FindMPI/test_mpi.cpp CMakeFiles/FindMPI/test_mpi.f90 CMakeFiles/FortranCInterface/Output.cmake CMakeFiles/FortranCInterface/exe-Release.cmake CMakeLists.txt class/lifo-external.h.in data_dist/CMakeLists.txt data_dist/matrix/CMakeLists.txt data_dist/matrix/redistribute/CMakeLists.txt fortran/CMakeLists.txt include/parsec.pc.in include/parsec/parsec_config.h.in include/parsec/parsec_options.h.in interfaces/CMakeLists.txt interfaces/dtd/CMakeLists.txt interfaces/ptg/CMakeLists.txt interfaces/ptg/ptg-compiler/CMakeLists.txt mca/CMakeLists.txt mca/device/CMakeLists.txt mca/device/cuda/ValidateModule.CMake mca/mca_static_components.h.in mca/pins/CMakeLists.txt mca/pins/alperf/ValidateModule.CMake mca/pins/iterators_checker/ValidateModule.CMake mca/pins/papi/ValidateModule.CMake mca/pins/print_steals/ValidateModule.CMake mca/pins/ptg_to_dtd/ValidateModule.CMake mca/pins/task_granularity/ValidateModule.CMake mca/pins/task_profiler/ValidateModule.CMake mca/sched/lhq/ValidateModule.CMake: phony + + +############################################# +# Clean all the built files. + +build clean: CLEAN + + +############################################# +# Print all primary targets available. + +build help: HELP + + +############################################# +# Make the all target the default. + +default all diff --git a/parsec/contrib/build_with_parsec/CMakeLists.txt b/parsec/contrib/build_with_parsec/CMakeLists.txt new file mode 100644 index 000000000..b8932473e --- /dev/null +++ b/parsec/contrib/build_with_parsec/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required (VERSION 3.18) +project (parsec-test-external C) + +# CMake Policies Tuning +if(POLICY CMP0074) + # CMP0074: Starting with CMake 3.12, all FIND_ use _ROOT in the search path + # in addition to the specified paths + cmake_policy(SET CMP0074 NEW) +endif() +if(POLICY CMP0104) + # CMP0104: Set default values for CMAKE_CUDA_ARCHITECTURES + cmake_policy(SET CMP0104 OLD) +endif() + +set(PaRSEC_ROOT "/home/joseph/parsec/build/install" CACHE PATH "Location of the PaRSEC installation") +find_package(PaRSEC REQUIRED) +if(NOT TARGET PaRSEC::parsec AND NOT TARGET PaRSEC::parsec_ptgpp) + message(FATAL_ERROR "User requested PaRSEC with PaRSEC_ROOT=${PaRSEC_ROOT} not found") +endif() +if(PARSEC_HAVE_CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 35-virtual) + endif() + enable_language(CUDA) + message(STATUS "Cuda architectures: ${CMAKE_CUDA_ARCHITECTURES}") +endif() + +# Test if parsec can compile/link using the DTD interface +add_executable(dtd_test_allreduce dtd_test_allreduce.c) +target_link_libraries(dtd_test_allreduce PRIVATE PaRSEC::parsec) + +# Test if parsec can compile/link using the PTG interface +include(ParsecCompilePTG) +add_executable(write_check) +target_ptg_sources(write_check PRIVATE write_check.jdf) +if(PARSEC_HAVE_CUDA) + target_sources(write_check PRIVATE write_check.cu) +endif() +target_link_libraries(write_check PRIVATE PaRSEC::parsec) + + diff --git a/parsec/contrib/build_with_parsec/Makefile b/parsec/contrib/build_with_parsec/Makefile new file mode 100644 index 000000000..4559a85a7 --- /dev/null +++ b/parsec/contrib/build_with_parsec/Makefile @@ -0,0 +1,32 @@ +PARSECDIR=/home/joseph/parsec/build/install +PARSEC_PKGCONFIG=/home/joseph/parsec/build/install/lib/pkgconfig + +PKG_CONFIG_PATH += ":${PARSEC_PKGCONFIG}" + +CC = gcc + +CFLAGS = $(shell PKG_CONFIG_PATH="${PKG_CONFIG_PATH}" pkg-config --cflags parsec) +LDFLAGS = $(shell PKG_CONFIG_PATH="${PKG_CONFIG_PATH}" pkg-config --libs parsec) + +PTGPP = ${PARSECDIR}/bin/parsec-ptgpp +PTGFLAGS = + +TESTS = write_check dtd_test_allreduce + +all:${TESTS} + +%.c %.h: %.jdf + ${PTGPP} -E -i $< -o `basename $@ .c` ${PTGFLAGS} + +%.o: %.c + ${CC} ${CFLAGS} -o $@ -c $< + +write_check: write_check.o + ${CC} -o $@ ${LDFLAGS} $< + +dtd_test_allreduce: dtd_test_allreduce.o + ${CC} -o $@ ${LDFLAGS} $^ + +clean: + rm -f *.o ${TESTS} write_check.c write_check.h + diff --git a/parsec/executed_tasks b/parsec/executed_tasks new file mode 100644 index 000000000..56c12f323 --- /dev/null +++ b/parsec/executed_tasks @@ -0,0 +1,409 @@ +interfaces/dtd/insert_function.c:333: if( !(tp->super.devices_index_mask & (1 << device->device_index))) +interfaces/dtd/insert_function.c:335: tp->super.devices_index_mask &= ~(1 << device->device_index); +interfaces/dtd/insert_function.c:336: if((NULL == device) || (NULL == device->taskpool_unregister)) +interfaces/dtd/insert_function.c:338: (void)device->taskpool_unregister(device, &tp->super); +interfaces/dtd/insert_function.c:1258: __tp->super.devices_index_mask |= (1 << device->device_index); +interfaces/dtd/insert_function.c:1356: if( !(tp->devices_index_mask & (1 << device->device_index))) continue; /* not supported */ +interfaces/dtd/insert_function.c:1360: if( PARSEC_DEV_CUDA == device->type ) continue; +interfaces/dtd/insert_function.c:1362: if( NULL != device->taskpool_register ) +interfaces/dtd/insert_function.c:1364: device->taskpool_register(device, (parsec_taskpool_t *)tp)) { +interfaces/dtd/insert_function.c:1365: tp->devices_index_mask &= ~(1 << device->device_index); /* can't use this type */ +interfaces/ptg/ptg-compiler/jdf2c.c:4372: " if(NULL != device->taskpool_register)\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4373: " if( PARSEC_SUCCESS != device->taskpool_register(device, (parsec_taskpool_t*)__parsec_tp) ) {\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4374: " parsec_debug_verbose(5, parsec_debug_output, \"Device %%s refused to register taskpool %%p\", device->name, __parsec_tp);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4375: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4378: " if(NULL != device->memory_register) { /* Register all the data */\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4381: " supported_dev |= device->type;\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4391: " device->name, parsec_dc->key_base, parsec_dc, __parsec_tp);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4392: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4398: " device->name, parsec_dc->key_base, parsec_dc, __parsec_tp);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4399: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4523: " if((NULL == (device = parsec_mca_device_get(_i))) || (NULL == device->memory_unregister)) continue;\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4537: " if((NULL == device) || (NULL == device->taskpool_unregister)) continue;\n" +interfaces/ptg/ptg-compiler/jdf2c.c:4538: " if( PARSEC_SUCCESS != device->taskpool_unregister(device, &__parsec_tp->super.super) ) continue;\n" +interfaces/ptg/ptg-compiler/jdf2c.c:6567: " struct parsec_body_cuda_%s_%s_s parsec_body = { cuda_device->cuda_index, cuda_stream->cuda_stream, NULL };\n" +interfaces/ptg/ptg-compiler/jdf2c.c:6619: " PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, \"GPU[%%s]:\\tEnqueue on device %%s priority %%d\", gpu_device->super.name, \n" +data_dist/matrix/two_dim_rectangle_cyclic.c:45: return device->memory_register(device, desc, +data_dist/matrix/two_dim_rectangle_cyclic.c:57: return device->memory_unregister(device, desc, twodbc->mat); +data_dist/matrix/sym_two_dim_rectangle_cyclic.c:48: return device->memory_register(device, desc, +data_dist/matrix/sym_two_dim_rectangle_cyclic.c:60: return device->memory_unregister(device, desc, sym_twodbc->mat); +data_internal.h:75: void *device_private; /**< The pointer to the device-specific data. +mca/device/cuda/device_cuda_migrate.c:258: dealer_device_index = dealer_device->super.device_index; +mca/device/template/device_template_module.c:75: assert(tp->devices_index_mask & (1 << device->device_index)); +mca/device/template/device_template_module.c:81: if( chores[j].type != device->type ) +mca/device/template/device_template_module.c:100: tp->devices_index_mask &= ~(1 << device->device_index); /* drop support for this device */ +mca/device/template/device_template_module.c:102: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); +mca/device/template/device_template_module.c:155: device->super.name = strdup("0"); +mca/device/template/device_template_module.c:157: device->super.type = PARSEC_DEV_TEMPLATE; +mca/device/template/device_template_module.c:158: device->super.executed_tasks = 0; +mca/device/template/device_template_module.c:159: device->super.transferred_data_in = 0; +mca/device/template/device_template_module.c:160: device->super.transferred_data_out = 0; +mca/device/template/device_template_module.c:161: device->super.required_data_in = 0; +mca/device/template/device_template_module.c:162: device->super.required_data_out = 0; +mca/device/template/device_template_module.c:164: device->super.attach = (parsec_device_attach_f)parsec_device_template_attach; +mca/device/template/device_template_module.c:165: device->super.detach = (parsec_device_detach_f)parsec_device_template_detach; +mca/device/template/device_template_module.c:166: device->super.memory_register = parsec_template_memory_register; +mca/device/template/device_template_module.c:167: device->super.memory_unregister = parsec_template_memory_unregister; +mca/device/template/device_template_module.c:168: device->super.taskpool_register = parsec_template_taskpool_register; +mca/device/template/device_template_module.c:169: device->super.taskpool_unregister = parsec_template_taskpool_unregister; +mca/device/template/device_template_module.c:171: device->super.device_hweight = 0; /* no computational capacity */ +mca/device/template/device_template_module.c:172: device->super.device_tweight = 0; +mca/device/template/device_template_module.c:173: device->super.device_sweight = 0; +mca/device/template/device_template_module.c:174: device->super.device_dweight = 0; +mca/device/template/device_template_module.c:177: parsec_inform("TEMPLATE Device %d enabled\n", device->super.device_index); +mca/device/device_gpu.c:68: if( NULL != PARSEC_DATA_GET_COPY(original, gpu_device->super.device_index) ) { +mca/device/device_gpu.c:121: parsec_list_t *sort_list = gpu_device->exec_stream[0]->fifo_pending; +mca/device/device_gpu.c:127: if (gpu_device->sort_starting_p == NULL || !parsec_list_nolock_contains(sort_list, gpu_device->sort_starting_p) ) { +mca/device/device_gpu.c:128: gpu_device->sort_starting_p = (parsec_list_item_t*)sort_list->ghost_element.list_next; +mca/device/device_gpu.c:132: parsec_list_item_t *p = gpu_device->sort_starting_p; +mca/device/device_gpu.c:183: gpu_stream->workspace->workspace[i] = zone_malloc( gpu_device->memory, size); +mca/device/device_gpu.c:186: gpu_device->super.name, +mca/device/device_gpu.c:190: (gpu_device->exec_stream[0]->prof_event_track_enable || +mca/device/device_gpu.c:191: gpu_device->exec_stream[1]->prof_event_track_enable)) { +mca/device/device_gpu.c:194: gpu_stream->workspace->workspace[i], gpu_device->super.device_index, +mca/device/device_gpu.c:222: for( i = 0; i < gpu_device->max_exec_streams; i++ ) { +mca/device/device_gpu.c:223: parsec_gpu_exec_stream_t *gpu_stream = gpu_device->exec_stream[i]; +mca/device/device_gpu.c:228: (gpu_device->exec_stream[0]->prof_event_track_enable || +mca/device/device_gpu.c:229: gpu_device->exec_stream[1]->prof_event_track_enable)) { +mca/device/device_gpu.c:232: gpu_stream->workspace->workspace[i], gpu_device->super.device_index, +mca/device/device_gpu.c:238: gpu_device->super.name, +mca/device/device_gpu.c:240: zone_free( gpu_device->memory, gpu_stream->workspace->workspace[j] ); +mca/device/device_gpu.c:311: parsec_output(parsec_gpu_output_stream, "Device %d:%d (%p) epoch\n", gpu_device->super.device_index, +mca/device/device_gpu.c:312: gpu_device->super.device_index, gpu_device, gpu_device->data_avail_epoch); +mca/device/device_gpu.c:314: gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->max_exec_streams); +mca/device/device_gpu.c:316: (unsigned long long)gpu_device->super.transferred_data_in, (unsigned long long)gpu_device->super.d2d_transfer, +mca/device/device_gpu.c:317: (unsigned long long)gpu_device->super.transferred_data_out, +mca/device/device_gpu.c:318: (unsigned long long)gpu_device->super.required_data_in, (unsigned long long)gpu_device->super.required_data_out); +mca/device/device_gpu.c:319: for( i = 0; i < gpu_device->max_exec_streams; i++ ) { +mca/device/device_gpu.c:320: dump_exec_stream(gpu_device->exec_stream[i]); +mca/device/device_gpu.c:322: if( !parsec_list_is_empty(&gpu_device->gpu_mem_lru) ) { +mca/device/device_gpu.c:325: PARSEC_LIST_ITERATOR(&gpu_device->gpu_mem_lru, item, +mca/device/device_gpu.c:334: if( !parsec_list_is_empty(&gpu_device->gpu_mem_owned_lru) ) { +mca/device/device_gpu.c:337: PARSEC_LIST_ITERATOR(&gpu_device->gpu_mem_owned_lru, item, +mca/device/device.c:312: assert( i == device->device_index ); +mca/device/device.c:314: device_counter[device->device_index] += device->executed_tasks; +mca/device/device.c:315: transferred_in[device->device_index] += device->transferred_data_in; +mca/device/device.c:316: transferred_out[device->device_index] += device->transferred_data_out; +mca/device/device.c:317: required_in[device->device_index] += device->required_data_in; +mca/device/device.c:318: required_out[device->device_index] += device->required_data_out; +mca/device/device.c:320: total += device->executed_tasks; +mca/device/device.c:321: total_data_in += device->transferred_data_in; +mca/device/device.c:322: total_data_out += device->transferred_data_out; +mca/device/device.c:323: total_required_in += device->required_data_in; +mca/device/device.c:324: total_required_out += device->required_data_out; +mca/device/device.c:326: device->executed_tasks = 0; +mca/device/device.c:327: device->transferred_data_in = 0; +mca/device/device.c:328: device->transferred_data_out = 0; +mca/device/device.c:329: device->required_data_in = 0; +mca/device/device.c:330: device->required_data_out = 0; +mca/device/device.c:352: device->device_index, device_counter[i], (device_counter[i]/gtotal)*100.00, +mca/device/device.c:356: (((double)transferred_out[i]) / (double)required_out[i]) * 100.0, device->name ); +mca/device/device.c:557: parsec_device_hweight[i] = device->device_hweight; +mca/device/device.c:558: parsec_device_sweight[i] = device->device_sweight; +mca/device/device.c:559: parsec_device_dweight[i] = device->device_dweight; +mca/device/device.c:560: parsec_device_tweight[i] = device->device_tweight; +mca/device/device.c:561: if( PARSEC_DEV_RECURSIVE == device->type ) continue; +mca/device/device.c:562: total_hperf += device->device_hweight; +mca/device/device.c:563: total_tperf += device->device_tweight; +mca/device/device.c:564: total_sperf += device->device_sweight; +mca/device/device.c:565: total_dperf += device->device_dweight; +mca/device/device.c:710: device->device_hweight = nstreams * fp_ipc * freq; /* No processor have half precision for now */ +mca/device/device.c:711: device->device_tweight = nstreams * fp_ipc * freq; /* No processor support tensor operations for now */ +mca/device/device.c:712: device->device_sweight = nstreams * fp_ipc * freq; +mca/device/device.c:713: device->device_dweight = nstreams * dp_ipc * freq; +mca/device/device.c:728: assert(tp->devices_index_mask & (1 << device->device_index)); +mca/device/device.c:736: if( chores[j].type != device->type ) +mca/device/device.c:754: tp->devices_index_mask &= ~(1 << device->device_index); /* discard this type */ +mca/device/device.c:756: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); +mca/device/device.c:820: if( NULL != device->context ) { +mca/device/device.c:833: device->device_index = parsec_nb_devices; +mca/device/device.c:835: device->context = context; +mca/device/device.c:837: PARSEC_OBJ_CONSTRUCT(&device->infos, parsec_info_object_array_t); +mca/device/device.c:838: parsec_info_object_array_init(&device->infos, &parsec_per_device_infos, device); +mca/device/device.c:839: return device->device_index; +mca/device/device.c:853: PARSEC_OBJ_DESTRUCT(&device->infos); +mca/device/device.c:855: if( NULL == device->context ) { +mca/device/device.c:859: if(device != parsec_devices[device->device_index]) { +mca/device/device.c:863: parsec_devices[device->device_index] = NULL; +mca/device/device.c:864: device->context = NULL; +mca/device/device.c:865: device->device_index = -1; +mca/device/device.c:880: if ((NULL == device) || (device->type & device_type)) +mca/device/device.c:886: tp->devices_index_mask &= ~(1 << device->device_index); +mca/device/transfer_gpu.c:222: parsec_list_item_t* item = (parsec_list_item_t*)gpu_device->gpu_mem_owned_lru.ghost_element.list_next; +mca/device/transfer_gpu.c:229: if( item == &(gpu_device->gpu_mem_owned_lru.ghost_element) ) { +mca/device/transfer_gpu.c:251: gpu_device->super.name, (void*)d2h_task, +mca/device/transfer_gpu.c:272: w2r_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; +mca/device/transfer_gpu.c:293: gpu_device->super.name, (void*)task, task->locals[0].value); +mca/device/transfer_gpu.c:300: gpu_device->super.transferred_data_out += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ +mca/device/transfer_gpu.c:311: gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); +mca/device/transfer_gpu.c:318: gpu_device->super.name, +mca/device/transfer_gpu.c:323: gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); +mca/device/transfer_gpu.c:324: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); +mca/device/transfer_gpu.c:330: gpu_device->data_avail_epoch++; +mca/device/cuda/device_cuda_module.c:179: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:180: int index, capability = cuda_device->major * 10 + cuda_device->minor; +mca/device/cuda/device_cuda_module.c:185: status = cudaSetDevice( cuda_device->cuda_index ); +mca/device/cuda/device_cuda_module.c:223: function_name, gpu_device->super.name); +mca/device/cuda/device_cuda_module.c:260: assert(PARSEC_DEV_CUDA == device->type); +mca/device/cuda/device_cuda_module.c:261: assert(tp->devices_index_mask & (1 << device->device_index)); +mca/device/cuda/device_cuda_module.c:267: if( chores[j].type != device->type ) +mca/device/cuda/device_cuda_module.c:286: tp->devices_index_mask &= ~(1 << device->device_index); /* drop support for this device */ +mca/device/cuda/device_cuda_module.c:288: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); +mca/device/cuda/device_cuda_module.c:358: gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:359: device = &gpu_device->super; +mca/device/cuda/device_cuda_module.c:361: cuda_device->cuda_index = (uint8_t)dev_id; +mca/device/cuda/device_cuda_module.c:362: cuda_device->major = (uint8_t)major; +mca/device/cuda/device_cuda_module.c:363: cuda_device->minor = (uint8_t)minor; +mca/device/cuda/device_cuda_module.c:364: len = asprintf(&gpu_device->super.name, "%s (%d)", szName, dev_id); +mca/device/cuda/device_cuda_module.c:366: gpu_device->super.name = ""; +mca/device/cuda/device_cuda_module.c:367: gpu_device->data_avail_epoch = 0; +mca/device/cuda/device_cuda_module.c:369: gpu_device->max_exec_streams = PARSEC_MAX_STREAMS; +mca/device/cuda/device_cuda_module.c:370: gpu_device->exec_stream = +mca/device/cuda/device_cuda_module.c:371: (parsec_gpu_exec_stream_t**)malloc(gpu_device->max_exec_streams * sizeof(parsec_gpu_exec_stream_t*)); +mca/device/cuda/device_cuda_module.c:375: gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)malloc(gpu_device->max_exec_streams * sizeof +mca/device/cuda/device_cuda_module.c:377: for( j = 1; j < gpu_device->max_exec_streams; j++ ) { +mca/device/cuda/device_cuda_module.c:378: gpu_device->exec_stream[j] = (parsec_gpu_exec_stream_t*)( +mca/device/cuda/device_cuda_module.c:379: (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[0] + j); +mca/device/cuda/device_cuda_module.c:381: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { +mca/device/cuda/device_cuda_module.c:382: parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; +mca/device/cuda/device_cuda_module.c:433: exec_stream->profiling = gpu_device->exec_stream[0]->profiling; +mca/device/cuda/device_cuda_module.c:444: device->type = PARSEC_DEV_CUDA; +mca/device/cuda/device_cuda_module.c:445: device->executed_tasks = 0; +mca/device/cuda/device_cuda_module.c:446: device->transferred_data_in = 0; +mca/device/cuda/device_cuda_module.c:447: device->d2d_transfer = 0; +mca/device/cuda/device_cuda_module.c:448: device->transferred_data_out = 0; +mca/device/cuda/device_cuda_module.c:449: device->required_data_in = 0; +mca/device/cuda/device_cuda_module.c:450: device->required_data_out = 0; +mca/device/cuda/device_cuda_module.c:452: device->attach = parsec_device_cuda_attach; +mca/device/cuda/device_cuda_module.c:453: device->detach = parsec_device_cuda_detach; +mca/device/cuda/device_cuda_module.c:454: device->memory_register = parsec_cuda_memory_register; +mca/device/cuda/device_cuda_module.c:455: device->memory_unregister = parsec_cuda_memory_unregister; +mca/device/cuda/device_cuda_module.c:456: device->taskpool_register = parsec_cuda_taskpool_register; +mca/device/cuda/device_cuda_module.c:457: device->taskpool_unregister = parsec_cuda_taskpool_unregister; +mca/device/cuda/device_cuda_module.c:458: device->data_advise = parsec_cuda_data_advise; +mca/device/cuda/device_cuda_module.c:459: device->memory_release = parsec_cuda_flush_lru; +mca/device/cuda/device_cuda_module.c:464: "the PaRSEC runtime developers", gpu_device->super.name, major, minor ); +mca/device/cuda/device_cuda_module.c:466: device->device_hweight = (float)streaming_multiprocessor * (float)hrate * (float)clockRate * 2e-3f; +mca/device/cuda/device_cuda_module.c:467: device->device_tweight = (float)streaming_multiprocessor * (float)trate * (float)clockRate * 2e-3f; +mca/device/cuda/device_cuda_module.c:468: device->device_sweight = (float)streaming_multiprocessor * (float)srate * (float)clockRate * 2e-3f; +mca/device/cuda/device_cuda_module.c:469: device->device_dweight = (float)streaming_multiprocessor * (float)drate * (float)clockRate * 2e-3f; +mca/device/cuda/device_cuda_module.c:472: PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); +mca/device/cuda/device_cuda_module.c:473: PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); +mca/device/cuda/device_cuda_module.c:474: PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); +mca/device/cuda/device_cuda_module.c:476: gpu_device->sort_starting_p = NULL; +mca/device/cuda/device_cuda_module.c:477: gpu_device->peer_access_mask = 0; /* No GPU to GPU direct transfer by default */ +mca/device/cuda/device_cuda_module.c:495: cuda_device->cuda_index, cuda_device->major, cuda_device->minor, device->name, +mca/device/cuda/device_cuda_module.c:502: device->device_dweight, device->device_sweight, device->device_tweight, device->device_hweight); +mca/device/cuda/device_cuda_module.c:509: if( NULL != gpu_device->exec_stream) { +mca/device/cuda/device_cuda_module.c:510: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { +mca/device/cuda/device_cuda_module.c:511: parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; +mca/device/cuda/device_cuda_module.c:541: free(gpu_device->exec_stream[0]); +mca/device/cuda/device_cuda_module.c:542: free(gpu_device->exec_stream); +mca/device/cuda/device_cuda_module.c:543: gpu_device->exec_stream = NULL; +mca/device/cuda/device_cuda_module.c:557: status = cudaSetDevice( cuda_device->cuda_index ); +mca/device/cuda/device_cuda_module.c:565: PARSEC_OBJ_DESTRUCT(&gpu_device->pending); +mca/device/cuda/device_cuda_module.c:568: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { +mca/device/cuda/device_cuda_module.c:569: parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; +mca/device/cuda/device_cuda_module.c:594: free(gpu_device->exec_stream[0]); +mca/device/cuda/device_cuda_module.c:595: free(gpu_device->exec_stream); +mca/device/cuda/device_cuda_module.c:596: gpu_device->exec_stream = NULL; +mca/device/cuda/device_cuda_module.c:598: cuda_device->cuda_index = -1; +mca/device/cuda/device_cuda_module.c:601: PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); +mca/device/cuda/device_cuda_module.c:602: PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); +mca/device/cuda/device_cuda_module.c:619: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:626: status = cudaSetDevice( cuda_device->cuda_index ); +mca/device/cuda/device_cuda_module.c:635: gpu_device->super.name, gpu_device->super.name); +mca/device/cuda/device_cuda_module.c:650: cuda_device->cuda_index, how_much_we_allocate, gpu_device->super.name, initial_free_mem); +mca/device/cuda/device_cuda_module.c:658: cuda_device->cuda_index, gpu_device->super.name); +mca/device/cuda/device_cuda_module.c:679: gpu_device->super.name,_free_mem, _total_mem, mem_elem_per_gpu); +mca/device/cuda/device_cuda_module.c:685: gpu_device->super.name,gpu_elem, gpu_elem->super.obj_reference_count, NULL); +mca/device/cuda/device_cuda_module.c:688: gpu_elem->device_index = gpu_device->super.device_index; +mca/device/cuda/device_cuda_module.c:693: gpu_device->super.name, gpu_elem, gpu_elem->super.obj_reference_count); +mca/device/cuda/device_cuda_module.c:694: parsec_list_push_back( &gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem ); +mca/device/cuda/device_cuda_module.c:697: if( 0 == mem_elem_per_gpu && parsec_list_is_empty( &gpu_device->gpu_mem_lru ) ) { +mca/device/cuda/device_cuda_module.c:698: parsec_warning("GPU[%s] Cannot allocate memory on GPU %s. Skip it!", gpu_device->super.name, gpu_device->super.name); +mca/device/cuda/device_cuda_module.c:703: gpu_device->super.name, mem_elem_per_gpu ); +mca/device/cuda/device_cuda_module.c:706: "GPU[%s] Allocate %u tiles on the GPU memory", gpu_device->super.name, mem_elem_per_gpu); +mca/device/cuda/device_cuda_module.c:708: if( NULL == gpu_device->memory ) { +mca/device/cuda/device_cuda_module.c:725: gpu_device->super.name, total_size); }) ); +mca/device/cuda/device_cuda_module.c:727: gpu_device->memory = zone_malloc_init( base_ptr, mem_elem_per_gpu, eltsize ); +mca/device/cuda/device_cuda_module.c:729: if( gpu_device->memory == NULL ) { +mca/device/cuda/device_cuda_module.c:731: gpu_device->super.name, gpu_device->super.name); +mca/device/cuda/device_cuda_module.c:736: gpu_device->super.name, mem_elem_per_gpu, eltsize ); +mca/device/cuda/device_cuda_module.c:739: gpu_device->mem_block_size = eltsize; +mca/device/cuda/device_cuda_module.c:740: gpu_device->mem_nb_blocks = mem_elem_per_gpu; +mca/device/cuda/device_cuda_module.c:750: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:759: gpu_device->super.name, gpu_copy, gpu_copy->device_private, gpu_copy->super.super +mca/device/cuda/device_cuda_module.c:762: assert( gpu_copy->device_index == cuda_device->super.super.device_index ); +mca/device/cuda/device_cuda_module.c:766: gpu_device->super.name, original->key); +mca/device/cuda/device_cuda_module.c:776: (gpu_device->exec_stream[0]->prof_event_track_enable || +mca/device/cuda/device_cuda_module.c:777: gpu_device->exec_stream[1]->prof_event_track_enable)) { +mca/device/cuda/device_cuda_module.c:778: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, +mca/device/cuda/device_cuda_module.c:780: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:782: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, +mca/device/cuda/device_cuda_module.c:785: gpu_device->super.device_index, NULL, 0); +mca/device/cuda/device_cuda_module.c:788: zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); +mca/device/cuda/device_cuda_module.c:810: parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); +mca/device/cuda/device_cuda_module.c:811: parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); +mca/device/cuda/device_cuda_module.c:813: if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { +mca/device/cuda/device_cuda_module.c:815: device->name, in_use); +mca/device/cuda/device_cuda_module.c:835: status = cudaSetDevice( cuda_device->cuda_index ); +mca/device/cuda/device_cuda_module.c:839: parsec_cuda_flush_lru(&cuda_device->super.super); +mca/device/cuda/device_cuda_module.c:842: assert( NULL != cuda_device->super.memory ); +mca/device/cuda/device_cuda_module.c:843: void* ptr = zone_malloc_fini(&cuda_device->super.memory); +mca/device/cuda/device_cuda_module.c:873: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:894: gpu_device->super.name, task_name, flow->name, i); +mca/device/cuda/device_cuda_module.c:901: gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index); +mca/device/cuda/device_cuda_module.c:908: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:914: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:928: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:933: gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[i]); +mca/device/cuda/device_cuda_module.c:939: lru_gpu_elem = (parsec_gpu_data_copy_t*)parsec_list_pop_front(&gpu_device->gpu_mem_lru); +mca/device/cuda/device_cuda_module.c:947: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:957: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:960: parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); +mca/device/cuda/device_cuda_module.c:972: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:983: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:1001: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:1004: parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); +mca/device/cuda/device_cuda_module.c:1012: gpu_device->super.name, task_name); +mca/device/cuda/device_cuda_module.c:1031: parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); +mca/device/cuda/device_cuda_module.c:1039: gpu_device->super.name, task_name); +mca/device/cuda/device_cuda_module.c:1052: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:1076: parsec_data_copy_detach(oldmaster, lru_gpu_elem, gpu_device->super.device_index); +mca/device/cuda/device_cuda_module.c:1084: gpu_device->super.name, task_name, this_task->task_class->name, i, lru_gpu_elem, +mca/device/cuda/device_cuda_module.c:1090: gpu_device->super.name, task_name, this_task->task_class->name, i, lru_gpu_elem); +mca/device/cuda/device_cuda_module.c:1097: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1102: (gpu_device->exec_stream[0]->prof_event_track_enable || +mca/device/cuda/device_cuda_module.c:1103: gpu_device->exec_stream[1]->prof_event_track_enable)) { +mca/device/cuda/device_cuda_module.c:1104: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, +mca/device/cuda/device_cuda_module.c:1106: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:1108: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, +mca/device/cuda/device_cuda_module.c:1111: gpu_device->super.device_index, NULL, 0); +mca/device/cuda/device_cuda_module.c:1115: zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); +mca/device/cuda/device_cuda_module.c:1120: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:1128: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1132: (gpu_device->exec_stream[0]->prof_event_track_enable || +mca/device/cuda/device_cuda_module.c:1133: gpu_device->exec_stream[1]->prof_event_track_enable)) { +mca/device/cuda/device_cuda_module.c:1134: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, +mca/device/cuda/device_cuda_module.c:1136: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:1148: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1151: parsec_data_copy_attach(master, gpu_elem, gpu_device->super.device_index); +mca/device/cuda/device_cuda_module.c:1158: gpu_device->super.name, task_name, +mca/device/cuda/device_cuda_module.c:1161: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); +mca/device/cuda/device_cuda_module.c:1165: gpu_device->data_avail_epoch++; +mca/device/cuda/device_cuda_module.c:1267: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; +mca/device/cuda/device_cuda_module.c:1280: gpu_device->super.name, gpu_task); +mca/device/cuda/device_cuda_module.c:1295: gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); +mca/device/cuda/device_cuda_module.c:1302: gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count); +mca/device/cuda/device_cuda_module.c:1314: if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { +mca/device/cuda/device_cuda_module.c:1328: if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { +mca/device/cuda/device_cuda_module.c:1333: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, in_elem, original); +mca/device/cuda/device_cuda_module.c:1341: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index); +mca/device/cuda/device_cuda_module.c:1348: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); +mca/device/cuda/device_cuda_module.c:1368: gpu_device->super.name, in_elem, in_elem->super.super.obj_reference_count, original, gpu_elem, gpu_elem->super.super.obj_reference_count); +mca/device/cuda/device_cuda_module.c:1381: transfer_from = parsec_data_start_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); +mca/device/cuda/device_cuda_module.c:1387: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1401: gpu_device->super.required_data_in += original->nb_elts; +mca/device/cuda/device_cuda_module.c:1408: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1413: gpu_device->super.device_index, gpu_elem->version, (void*) +mca/device/cuda/device_cuda_module.c:1418: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1422: gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); +mca/device/cuda/device_cuda_module.c:1443: gpu_task->prof_tp_id = cuda_device->cuda_index; +mca/device/cuda/device_cuda_module.c:1454: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:1465: gpu_device->super.device_index, &_info, +mca/device/cuda/device_cuda_module.c:1476: in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, +mca/device/cuda/device_cuda_module.c:1481: gpu_elem->device_private, cuda_device->cuda_index, +mca/device/cuda/device_cuda_module.c:1492: gpu_device->super.transferred_data_in += nb_elts; +mca/device/cuda/device_cuda_module.c:1494: gpu_device->super.d2d_transfer += nb_elts; +mca/device/cuda/device_cuda_module.c:1496: gpu_device->super.nb_data_faults += nb_elts; +mca/device/cuda/device_cuda_module.c:1503: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1519: parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); +mca/device/cuda/device_cuda_module.c:1523: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1583: gpu_device->super.name, parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, gpu_task), +mca/device/cuda/device_cuda_module.c:1588: gpu_device->super.name, gpu_task, __FILE__, __LINE__); +mca/device/cuda/device_cuda_module.c:1627: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:1667: gpu_device->super.name, gpu_task->ec->data[0].data_in, gpu_task->ec->data[0].data_in->super.super.obj_reference_count, +mca/device/cuda/device_cuda_module.c:1669: parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); +mca/device/cuda/device_cuda_module.c:1788: assert(gpu_stream == gpu_device->exec_stream[0]); +mca/device/cuda/device_cuda_module.c:1792: gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task)); +mca/device/cuda/device_cuda_module.c:1809: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:1816: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:1824: if( PARSEC_DEV_CUDA == src_device->super.type ) { +mca/device/cuda/device_cuda_module.c:1834: om = src_device->mutex; +mca/device/cuda/device_cuda_module.c:1837: if( parsec_atomic_cas_int32(&src_device->mutex, 0, -1) ) +mca/device/cuda/device_cuda_module.c:1852: if( parsec_atomic_cas_int32(&src_device->mutex, om, om+1) ) +mca/device/cuda/device_cuda_module.c:1864: gpu_device->super.name, task->data[i].data_in, +mca/device/cuda/device_cuda_module.c:1865: task->data[i].data_in->super.super.obj_reference_count, src_device->super.name, +mca/device/cuda/device_cuda_module.c:1871: gpu_device->super.name, task->data[i].data_in, +mca/device/cuda/device_cuda_module.c:1875: parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); +mca/device/cuda/device_cuda_module.c:1876: src_device->data_avail_epoch++; +mca/device/cuda/device_cuda_module.c:1880: rc = parsec_atomic_cas_int32(&src_device->mutex, -1, 0); (void)rc; +mca/device/cuda/device_cuda_module.c:1886: gpu_device->super.name, src_device->super.name, task->data[i].data_in, +mca/device/cuda/device_cuda_module.c:1897: gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task), +mca/device/cuda/device_cuda_module.c:1925: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, +mca/device/cuda/device_cuda_module.c:1935: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, gtask, gtask->ec); +mca/device/cuda/device_cuda_module.c:1936: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); +mca/device/cuda/device_cuda_module.c:1991: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2059: gpu_device->super.name, (void*)task); +mca/device/cuda/device_cuda_module.c:2064: gpu_device->super.name, (void*)task->ec); +mca/device/cuda/device_cuda_module.c:2081: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2112: if( gpu_task->last_data_check_epoch == gpu_device->data_avail_epoch ) +mca/device/cuda/device_cuda_module.c:2117: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2125: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2130: if( NULL != gpu_task->ec->data[0].data_in->original->device_copies[gpu_device->super.device_index] && +mca/device/cuda/device_cuda_module.c:2131: gpu_task->ec->data[0].data_in->original->owner_device == gpu_device->super.device_index ) { +mca/device/cuda/device_cuda_module.c:2135: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2137: gpu_device->super.device_index, +mca/device/cuda/device_cuda_module.c:2138: gpu_task->ec->data[0].data_in->original->device_copies[gpu_device->super.device_index]); +mca/device/cuda/device_cuda_module.c:2147: gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch; +mca/device/cuda/device_cuda_module.c:2169: gpu_device->super.name, flow->name, +mca/device/cuda/device_cuda_module.c:2180: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2231: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2263: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2274: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); +mca/device/cuda/device_cuda_module.c:2277: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); +mca/device/cuda/device_cuda_module.c:2284: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name, gpu_copy->readers); +mca/device/cuda/device_cuda_module.c:2287: assert( gpu_copy == parsec_data_get_copy(gpu_copy->original, gpu_device->super.device_index) ); +mca/device/cuda/device_cuda_module.c:2291: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); +mca/device/cuda/device_cuda_module.c:2294: gpu_device->super.required_data_out += nb_elts; +mca/device/cuda/device_cuda_module.c:2304: gpu_device->super.name, flow->name, original->key, gpu_copy, gpu_copy->super.super.obj_reference_count, +mca/device/cuda/device_cuda_module.c:2340: gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ +mca/device/cuda/device_cuda_module.c:2351: gpu_device->data_avail_epoch++; +mca/device/cuda/device_cuda_module.c:2355: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2356: parsec_task_snprintf(tmp, MAX_TASK_STRLEN, this_task), return_code, gpu_device->data_avail_epoch ); +mca/device/cuda/device_cuda_module.c:2377: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2426: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2444: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); +mca/device/cuda/device_cuda_module.c:2449: parsec_list_push_back(&gpu_device->gpu_mem_owned_lru, (parsec_list_item_t*)gpu_copy); +mca/device/cuda/device_cuda_module.c:2485: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2505: parsec_data_copy_detach(original, gpu_copy, gpu_device->super.device_index); +mca/device/cuda/device_cuda_module.c:2515: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); +mca/device/cuda/device_cuda_module.c:2524: gpu_device->data_avail_epoch++; +mca/device/cuda/device_cuda_module.c:2574: rc = gpu_device->mutex; +mca/device/cuda/device_cuda_module.c:2577: if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc+1 ) ) { +mca/device/cuda/device_cuda_module.c:2587: parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); +mca/device/cuda/device_cuda_module.c:2591: gpu_device->super.name, __FILE__, __LINE__); +mca/device/cuda/device_cuda_module.c:2599: status = cudaSetDevice( cuda_device->cuda_index ); +mca/device/cuda/device_cuda_module.c:2607: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2612: gpu_device->exec_stream[0], +mca/device/cuda/device_cuda_module.c:2623: PARSEC_PUSH_TASK(gpu_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*)progress_task); +mca/device/cuda/device_cuda_module.c:2634: exec_stream = (exec_stream + 1) % (gpu_device->max_exec_streams - 2); /* Choose an exec_stream */ +mca/device/cuda/device_cuda_module.c:2636: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tExecute %s priority %d", gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2641: gpu_device->exec_stream[2+exec_stream], +mca/device/cuda/device_cuda_module.c:2667: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tRetrieve data (if any) for %s priority %d", gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2673: gpu_device->exec_stream[1], +mca/device/cuda/device_cuda_module.c:2697: gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(gpu_device->pending) ); +mca/device/cuda/device_cuda_module.c:2700: gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; /* force at least one tour */ +mca/device/cuda/device_cuda_module.c:2701: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tGet from shared queue %s priority %d", gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2711: gpu_device->super.name, gpu_device->mutex, pop_null); +mca/device/cuda/device_cuda_module.c:2719: gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2735: gpu_device->super.executed_tasks++; +mca/device/cuda/device_cuda_module.c:2737: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; +mca/device/cuda/device_cuda_module.c:2738: parsec_device_load[gpu_device->super.device_index] -= parsec_device_sweight[gpu_device->super.device_index]; +mca/device/cuda/device_cuda_module.c:2739: PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream,"GPU[%s]: gpu_task %p freed at %s:%d", gpu_device->super.name, +mca/device/cuda/device_cuda_module.c:2742: rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); +mca/device/cuda/device_cuda_module.c:2750: gpu_device->super.name, __FILE__, __LINE__); +parsec_internal.h:629: * @brief Device-level info +parsec_internal.h:645: * @details infos stored under this handle exist per device-stream: +data.c:97: if ( !(device->type & PARSEC_DEV_CUDA) ){ diff --git a/parsec/mca/device/cuda/device_cuda_internal.h b/parsec/mca/device/cuda/device_cuda_internal.h index 9431c506a..351ab52ce 100644 --- a/parsec/mca/device/cuda/device_cuda_internal.h +++ b/parsec/mca/device/cuda/device_cuda_internal.h @@ -23,6 +23,10 @@ PARSEC_DECLSPEC extern const parsec_device_module_t parsec_device_cuda_module; int parsec_cuda_module_init( int device, parsec_device_module_t** module ); int parsec_cuda_module_fini(parsec_device_module_t* device); +int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es); +int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, + parsec_task_t *task, + int starving_device_index); END_C_DECLS diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 55e34cb5b..c6eb2b6a9 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -69,8 +69,8 @@ int parsec_cuda_migrate_fini() int parsec_cuda_get_device_load(int device) { unsigned int nvml_dev; - nvmlDevice_t nvml_device; - nvmlUtilization_t nvml_utilization; + //nvmlDevice_t nvml_device; + //nvmlUtilization_t nvml_utilization; //nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); //nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); @@ -185,7 +185,7 @@ parsec_cuda_change_device( int dealer_device_index) printf("parsec_cuda_change_device: Total_Dev %d Dealer_Dev %d\n", parsec_device_cuda_enabled, dealer_device_index); - starving_device_index = find_starving_device(dealer_device_index, parsec_device_cuda_enabled); + starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) starving_device_index = dealer_device_index; @@ -247,23 +247,59 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) * positive: starving device index. */ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, - parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t *migrated_gpu_task) + int starving_device_index, + parsec_gpu_task_t *migrated_gpu_task) { printf("TRIAL parsec_cuda_kernel_migrate \n"); - int starving_device_index, dealer_device_index; - parsec_device_gpu_module_t* starving_gpu_device; - - dealer_device_index = dealer_device->super.device_index; - starving_device_index = find_starving_device(dealer_device_index, parsec_device_cuda_enabled); + //int starving_device_index, dealer_device_index; + //parsec_device_gpu_module_t* starving_gpu_device; + // + //dealer_device_index = dealer_device->super.device_index; + //starving_device_index = find_starving_device(dealer_device_index); - if(starving_device_index == -1) - return -1; + //if(starving_device_index == -1) + // return -1; parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); + parsec_cuda_set_device_task(starving_device_index, 1); printf("Task migrated to device %d \n", starving_device_index); return starving_device_index; } +/** + * @brief check if there are any devices starving. If there are any starving device migrate + * half the task from the dealer device to the starving device. + * + * @param es + * @param dealer_gpu_device + * @return int + */ + +int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) +{ + int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; + int half = 0; + parsec_gpu_task_t *migrated_gpu_task = NULL; + + starving_device_index = find_starving_device(dealer_device_index); + if(starving_device_index == -1) + return -1; + + dealer_device_index = dealer_device->super.device_index; + dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); + + do + { + migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); + if(migrated_gpu_task != NULL) + parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + else + break; + + half++; + }while(half < (dealer_task_count / 2) ); +} + + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 5ccfcb067..be64ce6f2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -22,11 +22,11 @@ int parsec_cuda_get_device_task(int device); int parsec_cuda_set_device_load(int device, int load); int parsec_cuda_set_device_task(int device, int task_count); int is_starving(int device); -int find_starving_device(int dealer_device, int ndevice); +int find_starving_device(int dealer_device); parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, - parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t *migrated_gpu_task); + int starving_device_index, + parsec_gpu_task_t *migrated_gpu_task); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 11c769864..65da74f0b 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -27,6 +27,8 @@ #include #include +#include "parsec/mca/device/cuda/device_cuda_migrate.h" + static int parsec_cuda_data_advise(parsec_device_module_t *dev, parsec_data_t *data, int advice); /** * According to @@ -2608,6 +2610,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, gpu_task), gpu_task->ec->priority ); } + + //migrate_if_starving(es, gpu_device); + rc = progress_stream( gpu_device, gpu_device->exec_stream[0], parsec_cuda_kernel_push, @@ -2690,6 +2695,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, out_task_pop = progress_task; fetch_task_from_shared_queue: + + /** + * @brief Before a new task is selectd by the device manager for execution, + * the manager checks if there are any starving devices and migrate tasks, + * to the starving device, if there are available tasks to migrate. + */ + //migrate_if_starving(es, gpu_device); + assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { parsec_gpu_sort_pending_list(gpu_device); diff --git a/parsec/parsec-submodule-config.cmake b/parsec/parsec-submodule-config.cmake new file mode 100644 index 000000000..b55561fcf --- /dev/null +++ b/parsec/parsec-submodule-config.cmake @@ -0,0 +1,103 @@ +set(PARSEC_VERSION 4.0.0) + +# Required for check_language +include(CheckLanguage) + + +####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() ####### +####### Any changes to this file will be overwritten by the next CMake run #### +####### The input file was PaRSECConfig.cmake.in ######## + +get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/" ABSOLUTE) + +macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() +endmacro() + +macro(check_required_components _NAME) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(NOT ${_NAME}_${comp}_FOUND) + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + endif() + endif() + endforeach() +endmacro() + +#################################################################################### + +set_and_check(PARSEC_DIR "${PACKAGE_PREFIX_DIR}") +set_and_check(PARSEC_INCLUDE_DIRS "/home/joseph/parsec") +set_and_check(PARSEC_CMAKE_DIRS "/home/joseph/parsec/cmake_modules") +set_and_check(PARSEC_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/parsec") +set_and_check(PARSEC_BINARY_DIRS "${PACKAGE_PREFIX_DIR}") +set(PARSEC_LIBEXEC_DIRS "${PACKAGE_PREFIX_DIR}") # This is optional, may not exist in the installdir + +# Pull the dependencies +list(APPEND CMAKE_PREFIX_PATH "${PARSEC_CMAKE_DIRS}") +list(APPEND CMAKE_MODULE_PATH "${PARSEC_CMAKE_DIRS}") + +find_package(Threads) + +if(TRUE) + set_and_check(HWLOC_INCLUDE_DIR "/usr/include") + set_and_check(HWLOC_LIBRARY "/usr/lib/x86_64-linux-gnu/libhwloc.so") + find_package(HWLOC REQUIRED) +endif(TRUE) + +if(FALSE) + # Nothing exportable here, if this test succeed then PaRSEC supports OTF2 output. + if( "" ) + set_and_check(OTF2_CONFIG_PATH "") + elseif("") + set_and_check(OTF2_DIR "") + elseif( "" ) + cmake_path(GET "" PARENT_PATH OTF2_CONFIG_PATH_tmp) + set_and_check(OTF2_CONFIG_PATH "${OTF2_CONFIG_PATH_tmp}") + unset(OTF2_CONFIG_PATH_tmp) + endif( "" ) + find_package(OTF2 REQUIRED) +endif(FALSE) + +if(TRUE) + set_and_check(PAPI_INCLUDE_DIR "/usr/local/include") + set_and_check(PAPI_LIBRARY "/usr/local/lib/libpapi.so") + find_package(PAPI REQUIRED) +endif(TRUE) + +if(ON) + # Try to find MPI::MPI_C + if (NOT TARGET MPI::MPI_C) + # ensure that language C is enabled + check_language(C) + if(CMAKE_C_COMPILER) + enable_language(C) + else() + message(FATAL_ERROR "Cannot find package PaRSEC due to missing C language support; either enable_language(C) in your project or ensure that C compiler can be discovered") + endif() + find_package(MPI REQUIRED COMPONENTS C) + endif(NOT TARGET MPI::MPI_C) +endif(ON) + +if(FALSE) + find_package(CUDAToolkit REQUIRED) + SET(PARSEC_HAVE_CUDA TRUE) +endif(FALSE) + +if(OFF) + # Nothing exportable here, if this test succeed then PaRSEC supports tracing +endif(OFF) + +# Pull the PaRSEC:: +if(NOT TARGET PaRSEC::parsec) + include(${CMAKE_CURRENT_LIST_DIR}/PaRSECTargets.cmake) +endif(NOT TARGET PaRSEC::parsec) + +# Populate the variables + +set(PARSEC_PTGFLAGS "$ENV{PTGFLAGS}" CACHE STRING "Flags to pass to the parsec-ptgpp executable") +set(PARSEC_PTGPP_EXECUTABLE ${PARSEC_BINARY_DIRS}/parsec-ptgpp CACHE STRING "Point to the parsec-ptgpp executable") +set(PARSEC_LIBRARIES PaRSEC::parsec CACHE STRING "List of libraries suitable for use in target_link_libraries") # for compatibility with older (non-target based) clients diff --git a/parsec/parsec/class/lifo-external.h b/parsec/parsec/class/lifo-external.h new file mode 100644 index 000000000..50f8731e7 --- /dev/null +++ b/parsec/parsec/class/lifo-external.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2019-2022 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#ifndef LIFO_EXTERNAL_H_HAS_BEEN_INCLUDED +#define LIFO_EXTERNAL_H_HAS_BEEN_INCLUDED + +#if defined(PARSEC_ATOMIC_ACCESS_TO_INTERNALS_ALLOWED) +#error "This file should never be used while building PaRSEC internally" +#endif /* defined(PARSEC_ATOMIC_ACCESS_TO_INTERNALS_ALLOWED) */ + +#include "parsec/class/list_item.h" + +BEGIN_C_DECLS + +/** + * @brief opaque structure to hold a LIFO + */ +typedef struct parsec_lifo_opaque_s parsec_lifo_t; +PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(parsec_lifo_t); +struct parsec_lifo_opaque_s { + parsec_object_t super; + uint8_t alignment; + union { +#if defined(PARSEC_ATOMIC_HAS_ATOMIC_CAS_INT128) + __int128_t int128_private; +#else + int64_t int64_private; +#endif /* defined(PARSEC_ATOMIC_HAS_ATOMIC_CAS_INT128) */ + char lifo_private[16]; + }; + /* IMPORTANT: + * This structure needs to be kept in sync both with the + * beginning of parsec_lifo_s ***AND*** with the structure + * in src_root/CMakeLists.txt to match both the offset of + * the lifo_private member as well as the alignment of the + * LIFO itself. Both these checks can be done during CMake. + * + * The alignment of this struct must be as restrictive as the + * alignment of the internal parsec_lifo_t, to ensure that the + * contained fields have the correct alignment even for static + * objects. + */ +}; + +/** + * @brief check if the LIFO is empty + * + * @param[inout] lifo the LIFO to check + * @return 0 if lifo is not empty, 1 otherwise +x* + * @remark this function is thread safe + */ +PARSEC_DECLSPEC int +parsec_lifo_is_empty( parsec_lifo_t* lifo ); + +/** + * @brief check if the LIFO is empty, without forcing atomicity. + * + * @param[inout] lifo the LIFO to check + * @return 0 if lifo is not empty, 1 otherwise + * + * @remark this function is not thread safe + */ +PARSEC_DECLSPEC int +parsec_nolock_lifo_is_empty( parsec_lifo_t* lifo ); + +/** + * @brief Push an element in the LIFO + * + * @details push an element at the front of the LIFO + * + * @param[inout] lifo the LIFO into which to push the element + * @param[inout] item the element to push in lifo + * + * @remark this function is thread safe + */ +PARSEC_DECLSPEC void +parsec_lifo_push(parsec_lifo_t* lifo, parsec_list_item_t* item); + +/** + * @brief Push an element in the LIFO, without forcing atomicity. + * + * @details push an element at the front of the LIFO + * + * @param[inout] lifo the LIFO into which to push the element + * @param[inout] item the element to push in lifo + * + * @remark this function is not thread safe + */ +PARSEC_DECLSPEC void +parsec_lifo_nolock_push(parsec_lifo_t* lifo, parsec_list_item_t* item); + +/** + * @brief Chain a ring of elements in front of a LIFO + * + * @details Take a ring of elements (items->prev points to the last + * element in items), and push all the elements of items in + * front of the LIFO, preserving the order in items. + * + * @param[inout] lifo the LIFO into which to push the elements + * @param[inout] items the elements ring to push in front + * + * @remark this function is thread safe + */ +PARSEC_DECLSPEC void +parsec_lifo_chain(parsec_lifo_t* lifo, parsec_list_item_t* items); + +/** + * @brief Chain a ring of elements in front of a LIFO, without + * forcing atomicity. + * + * @details Take a ring of elements (items->prev points to the last + * element in items), and push all the elements of items in + * front of the LIFO, preserving the order in items. + * + * @param[inout] lifo the LIFO into which to push the elements + * @param[inout] items the elements ring to push in front + * + * @remark this function is not thread safe + */ +PARSEC_DECLSPEC void +parsec_lifo_nolock_chain(parsec_lifo_t* lifo, parsec_list_item_t* items); + +/** + * @brief Pop an element from the LIFO + * + * @details Pop the first element in the LIFO + * + * @param[inout] lifo the LIFO from which to pop the element + * @return the element that was removed from the LIFO (NULL if + * the LIFO was empty) + * + * @remark this function is thread safe + */ +PARSEC_DECLSPEC parsec_list_item_t* +parsec_lifo_pop(parsec_lifo_t* lifo); + +/** + * @brief Try popping an element from the LIFO + * + * @details Try popping the first element in the LIFO + * + * @param[inout] lifo the LIFO from which to pop the element + * @return the element that was removed from the LIFO (NULL if + * the LIFO was empty) + * + * @remark this function is thread safe + */ +PARSEC_DECLSPEC parsec_list_item_t* +parsec_lifo_try_pop(parsec_lifo_t* lifo); + +/** + * @brief Pop an element from the LIFO, without forcing atomicity. + * + * @details Pop the first element in the LIFO + * + * @param[inout] lifo the LIFO from which to pop the element + * @return the element that was removed from the LIFO (NULL if + * the LIFO was empty) + * + * @remark this function is not thread safe + */ +PARSEC_DECLSPEC parsec_list_item_t* +parsec_lifo_nolock_pop(parsec_lifo_t* lifo); + +/** + * @brief Allocate a lifo item. + * + * @details Allocate an element that is correctly aligned to be + * used in the lifo. One may change the alignment of elements before + * allocating the first item in the lifo by changing lifo->alignment. + * + * @param[in] lifo the LIFO the element will be used with. + * @return The element that was allocated. + */ +PARSEC_DECLSPEC parsec_list_item_t* +parsec_lifo_item_alloc(parsec_lifo_t* lifo, size_t truesize); + +/** + * @brief Free a lifo item. + * + * @details Free an item that was allocated by parsec_lifo_item_alloc. + * + * @param[inout] item the LIFO the element to free. + * + * @return none. + * + * @remarks The item must not be present in any lifo. + */ +PARSEC_DECLSPEC void +parsec_lifo_item_free(parsec_list_item_t* item); + +END_C_DECLS + +#endif /* LIFO_EXTERNAL_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/parsec/fortran/f2c_mangle.h b/parsec/parsec/fortran/f2c_mangle.h new file mode 100644 index 000000000..04456a789 --- /dev/null +++ b/parsec/parsec/fortran/f2c_mangle.h @@ -0,0 +1,16 @@ +#ifndef PARSEC_F2C_HEADER_INCLUDED +#define PARSEC_F2C_HEADER_INCLUDED + +/* Mangling for Fortran global symbols without underscores. */ +#define PARSEC_F2C_GLOBAL(name,NAME) name##_ + +/* Mangling for Fortran global symbols with underscores. */ +#define PARSEC_F2C_GLOBAL_(name,NAME) name##_ + +/* Mangling for Fortran module symbols without underscores. */ +#define PARSEC_F2C_MODULE(mod_name,name, mod_NAME,NAME) __##mod_name##_MOD_##name + +/* Mangling for Fortran module symbols with underscores. */ +#define PARSEC_F2C_MODULE_(mod_name,name, mod_NAME,NAME) __##mod_name##_MOD_##name + +#endif diff --git a/parsec/parsec/include/parsec.pc b/parsec/parsec/include/parsec.pc new file mode 100644 index 000000000..812f46432 --- /dev/null +++ b/parsec/parsec/include/parsec.pc @@ -0,0 +1,13 @@ +prefix=/home/joseph/parsec/build/install +exec_prefix=${prefix}/bin +libdir=${prefix}/lib +includedir=${prefix}/include + +Name: PaRSEC +Description: Parallel Runtime Scheduling and Execution Controller +Version: 4.0.0 +Requires: hwloc +Libs: -L${libdir} -lparsec -L/usr/lib/gcc/x86_64-linux-gnu/9 -L/usr/lib/x86_64-linux-gnu -L/usr/lib -L/lib/x86_64-linux-gnu -L/lib -lgfortran -lm -lgcc_s -lgcc -lquadmath -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc -latomic /usr/local/lib/libmpi.so /usr/lib/x86_64-linux-gnu/libhwloc.so -latomic +Libs.private: +Cflags: -I${includedir} -I/usr/include -I/usr/local/include + diff --git a/parsec/parsec/include/parsec/parsec_config.h b/parsec/parsec/include/parsec/parsec_config.h new file mode 100644 index 000000000..f736db01c --- /dev/null +++ b/parsec/parsec/include/parsec/parsec_config.h @@ -0,0 +1,71 @@ +#ifndef PARSEC_CONFIG_H_HAS_BEEN_INCLUDED +#define PARSEC_CONFIG_H_HAS_BEEN_INCLUDED + +/* Compiler dependent capabilities */ +#define PARSEC_ATOMIC_USE_C11_ATOMICS +/* #undef PARSEC_ATOMIC_USE_GCC_32_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_GCC_64_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_GCC_128_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_GCC_128_OTHER_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_XLC_32_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_XLC_64_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_XLC_LLSC_32_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_XLC_LLSC_64_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_MIPOSPRO_32_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_MIPOSPRO_64_BUILTINS */ +/* #undef PARSEC_ATOMIC_USE_SUN_32 */ +/* #undef PARSEC_ATOMIC_USE_SUN_64 */ +/* #undef PARSEC_ARCH_X86 */ +#define PARSEC_ARCH_X86_64 +/* #undef PARSEC_ARCH_PPC */ + +#define PARSEC_HAVE_BUILTIN_EXPECT +#define PARSEC_HAVE_BUILTIN_CPU +#define PARSEC_HAVE_ATTRIBUTE_VISIBILITY +#define PARSEC_HAVE_ATTRIBUTE_ALWAYS_INLINE +#define PARSEC_HAVE_ATTRIBUTE_FORMAT_PRINTF +#define PARSEC_HAVE_ATTRIBUTE_DEPRECATED + +#define PARSEC_HAVE_PTHREAD_BARRIER +/* #undef PARSEC_HAVE_PTHREAD_BARRIER_H */ + +#define PARSEC_HAVE_THREAD_LOCAL +#define PARSEC_HAVE_PTHREAD_GETSPECIFIC + +/* Optional packages */ +#define PARSEC_HAVE_HWLOC_BITMAP +#define PARSEC_HAVE_HWLOC_PARENT_MEMBER +#define PARSEC_HAVE_HWLOC_CACHE_ATTR +#define PARSEC_HAVE_HWLOC_OBJ_PU + +/* #undef PARSEC_HAVE_RECENT_LEX */ + +#define PARSEC_PROFILING_USE_MMAP +#define PARSEC_PROFILING_USE_HELPER_THREAD + +#define PARSEC_HAVE_VALGRIND_API + +/* #undef PARSEC_HAVE_INDENT */ +#define PARSEC_INDENT_PREFIX "INDENT_EXECUTABLE-NOTFOUND" +#define PARSEC_INDENT_OPTIONS "-nbad -bap -nbc -br -brs -ncdb -ce -cli0 -d0 -di1 -nfc1 -i4 -ip0 -lp -npcs -npsl -nsc -nsob -l120" + +#define PARSEC_HAVE_AWK +#define PARSEC_AWK_PREFIX "/usr/bin/awk" + +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif /* !defined(_GNU_SOURCE) */ + +#ifdef PARSEC_ARCH_PPC +#define inline __inline__ +#define restrict +#endif + +/* We undefined the PARSEC_CONFIG_H_HAS_BEEN_INCLUDED #define so that the parsec_options.h + * can be loaded. This mechanism is only used durig the PaRSEC compilation, once installed + * the parsec_options.h will become the new parsec_config.h. + */ +#undef PARSEC_CONFIG_H_HAS_BEEN_INCLUDED +#include "parsec/parsec_options.h" + +#endif /* PARSEC_CONFIG_H_HAS_BEEN_INCLUDED */ diff --git a/parsec/parsec/include/parsec/parsec_options.h b/parsec/parsec/include/parsec/parsec_options.h new file mode 100644 index 000000000..2a3dede40 --- /dev/null +++ b/parsec/parsec/include/parsec/parsec_options.h @@ -0,0 +1,140 @@ +#ifndef PARSEC_CONFIG_H_HAS_BEEN_INCLUDED +#define PARSEC_CONFIG_H_HAS_BEEN_INCLUDED + +/* This file contains the OS dependent capabilities, and should be generic for + * all compilers o a particular architecture. It is used during the PaRSEC build to + * import all OS dependent features, but once PaRSEC installed this file will + * become the parsec_config.h and will hide all compiler dependent features used + * during PaRSEC compilation. + */ +/** @brief Define the compilation date of the runtime */ +#define PARSEC_COMPILE_DATE "2022-03-30T09:11:36" +/** @brief Define the PaRSEC major version number */ +#define PARSEC_VERSION_MAJOR 4 +/** @brief Define the PaRSEC minor version number */ +#define PARSEC_VERSION_MINOR 0 +/** @brief Define the PaRSEC patch version number */ +#define PARSEC_VERSION_RELEASE 0 +/** @brief Define the branch that was compiled */ +#define PARSEC_GIT_BRANCH "gpu_migrate" +/** @brief Define the commit hash that was compiled */ +#define PARSEC_GIT_HASH "804e58084" +/** @brief Define the changes to the commit hash that was compiled */ +#define PARSEC_GIT_DIRTY "2 files changed, 4 insertions(+)" +/** @brief Define the commit date of the runtime */ +#define PARSEC_GIT_DATE "2022-03-28T10:52:17-04:00" + +/* OS dependent capabilities */ +#define PARSEC_HAVE_PTHREAD +#define PARSEC_HAVE_SCHED_SETAFFINITY +#define PARSEC_HAVE_CLOCK_GETTIME +#define PARSEC_HAVE_ASPRINTF +#define PARSEC_HAVE_VASPRINTF +#define PARSEC_HAVE_RAND_R +#define PARSEC_HAVE_RANDOM +#define PARSEC_HAVE_ERAND48 +#define PARSEC_HAVE_NRAND48 +#define PARSEC_HAVE_LRAND48 +#define PARSEC_HAVE_GETLINE +#define PARSEC_HAVE_SETENV +#define PARSEC_HAVE_STDARG_H +#define PARSEC_HAVE_UNISTD_H +#define PARSEC_HAVE_SYS_PARAM_H +#define PARSEC_HAVE_SYS_TYPES_H +#define PARSEC_HAVE_SYSLOG_H +#define PARSEC_HAVE_VA_COPY +/* #undef PARSEC_HAVE_UNDERSCORE_VA_COPY */ +#define PARSEC_HAVE_GETOPT_LONG +#define PARSEC_HAVE_GETRUSAGE +#define PARSEC_HAVE_RUSAGE_THREAD +#define PARSEC_HAVE_GETOPT_H +#define PARSEC_HAVE_ERRNO_H +#define PARSEC_HAVE_STDDEF_H +#define PARSEC_HAVE_STDBOOL_H +#define PARSEC_HAVE_CTYPE_H +#define PARSEC_HAVE_LIMITS_H +#define PARSEC_HAVE_STRING_H +#define PARSEC_HAVE_GEN_H +#define PARSEC_HAVE_COMPLEX_H +#define PARSEC_HAVE_EXECINFO_H +#define PARSEC_HAVE_SYS_MMAN_H +#define PARSEC_HAVE_DLFCN_H +#define PARSEC_HAVE_SYSCONF +#define PARSEC_HAVE_ATTRIBUTE_DEPRECATED + +/* Compiler Specific Options */ +#define PARSEC_ATOMIC_HAS_ATOMIC_CAS_INT128 +#define PARSEC_HAVE_INT128 + +/* Scheduling engine */ +#define PARSEC_SCHED_DEPS_MASK + +/* Communication engine */ +#define PARSEC_DIST_WITH_MPI +#define PARSEC_DIST_THREAD +#define PARSEC_DIST_PRIORITIES +#define PARSEC_DIST_COLLECTIVES +#define PARSEC_DIST_SHORT_LIMIT 1 + +/* GPU Support */ +#define PARSEC_GPU_WITH_CUDA +/* #undef PARSEC_GPU_CUDA_ALLOC_PER_TILE */ +/* #undef PARSEC_GPU_WITH_OPENCL */ +/* #undef PARSEC_HAVE_PEER_DEVICE_MEMORY_ACCESS */ + +/* debug */ +/* #undef PARSEC_DEBUG */ +/* #undef PARSEC_DEBUG_PARANOID */ +/* #undef PARSEC_DEBUG_NOISIER */ +/* #undef PARSEC_DEBUG_HISTORY */ +/* #undef PARSEC_LIFO_USE_ATOMICS */ + +/* profiling */ +/* #undef PARSEC_PROF_TRACE */ +/* #undef PARSEC_PROF_TRACE_PTG_INTERNAL_INIT */ +/* #undef PARSEC_PROF_RUSAGE_EU */ +/* #undef PARSEC_PROF_TRACE_SCHEDULING_EVENTS */ +/* #undef PARSEC_PROF_TRACE_ACTIVE_ARENA_SET */ +/* #undef PARSEC_PROF_GRAPHER */ +/* #undef PARSEC_PROF_DRY_RUN */ +/* #undef PARSEC_PROF_DRY_BODY */ +/* #undef PARSEC_PROF_DRY_DEP */ + +/* Software Defined Events through PAPI-SDE */ +/* #undef PARSEC_PAPI_SDE */ + +/* Instrumenting (PINS) */ +#define PARSEC_PROF_PINS + +/* Simulating */ +/* #undef PARSEC_SIM */ + +/* Configuration parameters */ +#define PARSEC_WANT_HOME_CONFIG_FILES + +/* Compiler and flags used to compile PaRSEC generated sources */ +#define CMAKE_PARSEC_C_COMPILER "/usr/bin/cc" +#define CMAKE_PARSEC_C_FLAGS "" +#define CMAKE_PARSEC_C_INCLUDES "/home/joseph/parsec/build/install/include;/usr/include;/usr/local/include" + +#define PARSEC_HAVE_HWLOC +#define PARSEC_HAVE_PAPI +/* #undef PARSEC_HAVE_CUDA */ +/* #undef PARSEC_HAVE_OPENCL */ +#define PARSEC_HAVE_MPI +#define PARSEC_HAVE_MPI_20 +#define PARSEC_HAVE_MPI_30 +#define PARSEC_HAVE_MPI_OVERTAKE +/* #undef PARSEC_HAVE_AYUDAME */ + +#define PARSEC_INSTALL_PREFIX "/home/joseph/parsec/build/install" +/* Default PATH to look for the CUDA .so files */ +#define PARSEC_LIB_CUDA_PREFIX "." +#define PARSEC_LIB_LEVE_ZERO_PREFIX "." + +#define PARSEC_SIZEOF_VOID_P 8 + +#include "parsec/parsec_config_bottom.h" + +#endif /* PARSEC_CONFIG_H_HAS_BEEN_INCLUDED */ + diff --git a/parsec/parsec/mca/mca_static_components.h b/parsec/parsec/mca/mca_static_components.h new file mode 100644 index 000000000..7d313f3f7 --- /dev/null +++ b/parsec/parsec/mca/mca_static_components.h @@ -0,0 +1,95 @@ +#ifndef _MCA_STATIC_COMPNENTS_H +#define _MCA_STATIC_COMPNENTS_H + +#ifndef MCA_REPOSITORY_C +#error This file must be included once only, and by mca_repository.c only +#endif + +#include "parsec/parsec_config.h" +#include "parsec/mca/mca.h" +#include "parsec/utils/mca_param.h" +#include "parsec/utils/output.h" +#include + +#define MCA_NB_STATIC_COMPONENTS 14 + +mca_base_component_t *pins_iterators_checker_static_component(void); +mca_base_component_t *pins_print_steals_static_component(void); +mca_base_component_t *pins_ptg_to_dtd_static_component(void); +mca_base_component_t *pins_task_granularity_static_component(void); +mca_base_component_t *sched_ap_static_component(void); +mca_base_component_t *sched_gd_static_component(void); +mca_base_component_t *sched_ip_static_component(void); +mca_base_component_t *sched_lfq_static_component(void); +mca_base_component_t *sched_lhq_static_component(void); +mca_base_component_t *sched_ll_static_component(void); +mca_base_component_t *sched_ltq_static_component(void); +mca_base_component_t *sched_pbq_static_component(void); +mca_base_component_t *sched_rnd_static_component(void); +mca_base_component_t *sched_spq_static_component(void); + +static mca_base_component_t *mca_static_components[MCA_NB_STATIC_COMPONENTS+1] = { NULL, }; + +static int add_static_component(mca_base_component_t *c, int p) +{ + if( NULL == c ) + return p; + assert( p < MCA_NB_STATIC_COMPONENTS ); mca_static_components[p] = c; + mca_static_components[p+1] = NULL; + return p+1; +} + +static void register_base_component(const char *cname) +{ + char *help, *ignored; + int rc; + + rc = asprintf(&help, "Default selection set of components for the %s framework " + "( means use all components that can be found)", cname); + rc = parsec_mca_param_reg_string_name("mca", cname, + help, + false, false, + NULL, &ignored); + if( 0 < rc ) { /* parameter succesfully registered */ + /* Create a synonym to facilitate the MCA params */ + (void)parsec_mca_param_reg_syn_name(rc, NULL, cname, false); + } + free(help); + rc = asprintf(&help, "Verbosity level for the %s framework (default: 0). " + "Valid values: -1:\"none\", 0:\"error\", 10:\"component\", 20:\"warn\", " + "40:\"info\", 60:\"trace]\", 80:\"debug\", 100:\"max]\", 0 - 100", cname); + parsec_mca_param_reg_int_name(cname, "verbose", + help, false, false, + 0, (int*)&ignored); + free(help); + (void)ignored; + (void)rc; +} + +static void mca_static_components_init(void) +{ + static int mca_static_components_inited = 0; + int p = 0; + if (mca_static_components_inited) { + return; + } + mca_static_components_inited = 1; + + register_base_component("device"); + p = add_static_component(pins_iterators_checker_static_component(), p); + p = add_static_component(pins_print_steals_static_component(), p); + p = add_static_component(pins_ptg_to_dtd_static_component(), p); + p = add_static_component(pins_task_granularity_static_component(), p); register_base_component("pins"); + p = add_static_component(sched_ap_static_component(), p); + p = add_static_component(sched_gd_static_component(), p); + p = add_static_component(sched_ip_static_component(), p); + p = add_static_component(sched_lfq_static_component(), p); + p = add_static_component(sched_lhq_static_component(), p); + p = add_static_component(sched_ll_static_component(), p); + p = add_static_component(sched_ltq_static_component(), p); + p = add_static_component(sched_pbq_static_component(), p); + p = add_static_component(sched_rnd_static_component(), p); + p = add_static_component(sched_spq_static_component(), p); register_base_component("sched"); +} + +#endif /* _MCA_STATIC_COMPNENTS_H */ diff --git a/parsec/scheduling.c b/parsec/scheduling.c index 97eb2ecce..3b904febf 100644 --- a/parsec/scheduling.c +++ b/parsec/scheduling.c @@ -26,6 +26,8 @@ #include "parsec/dictionary.h" #include "parsec/utils/backoff.h" +#include "parsec/mca/device/cuda/device_cuda_migrate.h" + #include #if defined(PARSEC_HAVE_STRING_H) #include @@ -42,6 +44,8 @@ #include #include + + static void parsec_rusage_per_es(parsec_execution_stream_t* es, bool print) { struct rusage current; @@ -538,7 +542,7 @@ int __parsec_context_wait( parsec_execution_stream_t* es ) } #if defined(DISTRIBUTED) - if( (1 == parsec_communication_engine_up) && + if( (1 == parsec_communication_engine_up) && (es->virtual_process[0].parsec_context->nb_nodes == 1) && PARSEC_THREAD_IS_MASTER(es) ) { /* check for remote deps completion */ @@ -554,6 +558,14 @@ int __parsec_context_wait( parsec_execution_stream_t* es ) } misses_in_a_row++; /* assume we fail to extract a task */ + /** + * @brief This function will force a thread to be a manager thread, + * if there are any tasks migrated to a particular device. + * This will also ensure that a migrated task gets priority in execution + * when compared to a new task. + */ + parsec_cuda_kernel_dequeue(es); + task = parsec_current_scheduler->module.select(es, &distance); if( task != NULL ) { diff --git a/parsec/tests/apps/stencil/loop_gen_1D b/parsec/tests/apps/stencil/loop_gen_1D new file mode 100755 index 000000000..c56ba107a --- /dev/null +++ b/parsec/tests/apps/stencil/loop_gen_1D @@ -0,0 +1,16 @@ +#!/bin/sh + +if [ $# -ne 1 ] + then + echo "$0 Radius" + exit + fi + +RADIUS=$1 +echo " OUT(i,j) = WEIGHT_1D(0)*IN(i,j)" > loop_body_1D.in +jj=1 +while [ $jj -le $RADIUS ]; do + echo " +WEIGHT_1D(-$jj)*IN(i,j-$jj)+WEIGHT_1D($jj)*IN(i,j+$jj)" >> loop_body_1D.in + jj=`expr $jj + 1` +done +echo " ;" >> loop_body_1D.in From 89e94826217bc00ba0850bf5d819eab4f5dc2af3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 23 Apr 2022 06:03:21 +1000 Subject: [PATCH 014/215] Increment the gpu task count when a task is added to the qpu queue. Decrement the gpu task count when a task is executed by the gpu. Decrement the task at the dealer gpu when task is migrated. Increment the task at the straving gpu when the migrated task is recieved. --- parsec/mca/device/cuda/device_cuda_migrate.c | 23 ++++++-------------- parsec/mca/device/cuda/device_cuda_module.c | 11 ++++++++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c6eb2b6a9..c3d7b48a1 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -182,16 +182,14 @@ parsec_cuda_change_device( int dealer_device_index) int starving_device_index; parsec_device_gpu_module_t* starving_gpu_device; - printf("parsec_cuda_change_device: Total_Dev %d Dealer_Dev %d\n", - parsec_device_cuda_enabled, dealer_device_index); - starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) starving_device_index = dealer_device_index; starving_gpu_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(starving_device_index); - printf(" Starving_dev %d \n", starving_device_index); + printf(" parsec_cuda_change_device: Total_Dev %d Dealer_Dev %d Starving_dev %d \n", + parsec_device_cuda_enabled, dealer_device_index, starving_device_index); return starving_gpu_device; } @@ -250,17 +248,6 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, int starving_device_index, parsec_gpu_task_t *migrated_gpu_task) { - printf("TRIAL parsec_cuda_kernel_migrate \n"); - - //int starving_device_index, dealer_device_index; - //parsec_device_gpu_module_t* starving_gpu_device; - // - //dealer_device_index = dealer_device->super.device_index; - //starving_device_index = find_starving_device(dealer_device_index); - - //if(starving_device_index == -1) - // return -1; - parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); parsec_cuda_set_device_task(starving_device_index, 1); printf("Task migrated to device %d \n", starving_device_index); @@ -294,7 +281,11 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module { migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); if(migrated_gpu_task != NULL) - parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + { + parsec_cuda_set_device_load(dealer_device_index, -1); // decrement task count at the dealer device + parsec_cuda_set_device_load(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + } else break; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 65da74f0b..c3581fb98 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2585,6 +2585,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, nanosleep(&delay, NULL); } } + + parsec_cuda_set_device_load(gpu_device->super.device_index, 1); // increment task count for this device + if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); return PARSEC_HOOK_RETURN_ASYNC; @@ -2642,6 +2645,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_task_snprintf(tmp, MAX_TASK_STRLEN, gpu_task->ec), gpu_task->ec->priority ); } + + /** + * @brief decrement the task count for the device. The decrement is done + * immediatly befor the execution of the task. + * TODO: Should this be moved to when the task is completed? + */ + parsec_cuda_set_device_load(gpu_device->super.device_index, -1); + rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], NULL, From 7fd3be67e4df47d90ea0f2f2d3d330c884e977d1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Apr 2022 01:28:09 +1000 Subject: [PATCH 015/215] Testcase HelloWorldCuda.jdf added --- tests/dsl/ptg/cuda/CMakeLists.txt | 5 + tests/dsl/ptg/cuda/HelloWorldCuda.jdf | 128 ++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 tests/dsl/ptg/cuda/HelloWorldCuda.jdf diff --git a/tests/dsl/ptg/cuda/CMakeLists.txt b/tests/dsl/ptg/cuda/CMakeLists.txt index 65a6097b1..19cc68d02 100644 --- a/tests/dsl/ptg/cuda/CMakeLists.txt +++ b/tests/dsl/ptg/cuda/CMakeLists.txt @@ -19,6 +19,11 @@ if(PARSEC_HAVE_CUDA) target_ptg_sources(stage PRIVATE "stage_custom.jdf") target_link_libraries(stage PRIVATE CUDA::cublas) + parsec_addtest_executable(C HelloWorldCuda ) + target_include_directories(HelloWorldCuda PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) + target_ptg_sources(HelloWorldCuda PRIVATE "HelloWorldCuda.jdf") + target_link_libraries(HelloWorldCuda PRIVATE CUDA::cublas) + endif( NOT TARGET CUDA::cublas ) # Testing for geting best device diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf new file mode 100644 index 000000000..1dc21c348 --- /dev/null +++ b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf @@ -0,0 +1,128 @@ +extern "C" %{ + +#include "parsec.h" +#include + +#include "parsec/parsec_config.h" +#include "parsec/utils/mca_param.h" + + + +#include "parsec/mca/device/cuda/device_cuda_internal.h" +#include "parsec/data_distribution.h" +#include "parsec/data_dist/matrix/matrix.h" +#include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "parsec/execution_stream.h" +#include "parsec/class/info.h" + +#if defined(PARSEC_HAVE_CUDA) +#include "parsec/mca/device/cuda/device_cuda_internal.h" +#include +#endif /* defined(PARSEC_HAVE_CUDA) */ + + +%} + +HelloWorldCuda(k) + +k = 0 .. 0 + +: taskdist( k ) + +WRITE A <- NEW + +BODY [type=CUDA weight=1] +{ + fprintf(stderr, "HelloWorldCuda on GPU \n"); +} +END + +BODY +{ + fprintf(stderr, "HelloWorldCuda is running on a CPU, which is not the intended behavior \n"); +} +END + +extern "C" %{ + +static uint32_t +rank_of(parsec_data_collection_t *desc, ...) +{ + (void)desc; + return 0; +} + +static int32_t +vpid_of(parsec_data_collection_t *desc, ...) +{ + (void)desc; + return 0; +} + +static parsec_data_key_t +data_key(parsec_data_collection_t *desc, ...) +{ + int k; + va_list ap; + (void)desc; + va_start(ap, desc); + k = va_arg(ap, int); + va_end(ap); + return (uint64_t)k; +} + +int main(int argc, char *argv[]) +{ + parsec_context_t* parsec; + int rc; + int rank, world; + parsec_data_collection_t taskdist; + parsec_HelloWorldCuda_taskpool_t *tp; + +#if defined(PARSEC_HAVE_MPI) + { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided); + } + MPI_Comm_size(MPI_COMM_WORLD, &world); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#else + world = 1; + rank = 0; +#endif + + parsec = parsec_init(-1, &argc, &argv); + + + parsec_data_collection_init(&taskdist, world, rank); + taskdist.rank_of = rank_of; + taskdist.vpid_of = vpid_of; + taskdist.data_key = data_key; + + tp = parsec_HelloWorldCuda_new(&taskdist); + + //parsec_add2arena( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], + // parsec_datatype_double_complex_t, + // PARSEC_MATRIX_FULL, 1, 10, 10, 10, + // PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_arena_datatype_construct( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], + sizeof(int), PARSEC_ARENA_ALIGNMENT_SSE, + parsec_datatype_int_t ); + + rc = parsec_context_add_taskpool( parsec, &tp->super ); + rc = parsec_context_start(parsec); + rc = parsec_context_wait(parsec); + + parsec_taskpool_free(&tp->super); + parsec_data_collection_destroy(&taskdist); + + parsec_fini(&parsec); +#if defined(PARSEC_HAVE_MPI) + MPI_Finalize(); +#endif + + return 0; +} + +%} From 0e983720ffa6a031ec55d23b4407839e38a22fb1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Apr 2022 14:33:33 -0400 Subject: [PATCH 016/215] PARSEC_HAVE_CUDA_NVML is used to check if the cuda nvml library exists. --- parsec/mca/device/cuda/device_cuda_migrate.c | 31 +++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c3d7b48a1..057bc8ca9 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -19,7 +19,9 @@ int parsec_cuda_migrate_init(int ndevices) { int i; cudaError_t cudastatus; - //nvmlReturn_t nvml_ret; + #if defined(PARSEC_HAVE_CUDA_NVML) + nvmlReturn_t nvml_ret; + #endif NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); @@ -32,7 +34,9 @@ int parsec_cuda_migrate_init(int ndevices) migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } - //nvml_ret = nvmlInit_v2(); + #if defined(PARSEC_HAVE_CUDA_NVML) + nvml_ret = nvmlInit_v2(); + #endif return 0; @@ -43,7 +47,9 @@ int parsec_cuda_migrate_fini() int i; free(device_info); - //nvmlShutdown(); + #if defined(PARSEC_HAVE_CUDA_NVML) + nvmlShutdown(); + #endif for(i = 0; i < NDEVICES; i++) { @@ -69,14 +75,19 @@ int parsec_cuda_migrate_fini() int parsec_cuda_get_device_load(int device) { unsigned int nvml_dev; - //nvmlDevice_t nvml_device; - //nvmlUtilization_t nvml_utilization; + + #if defined(PARSEC_HAVE_CUDA_NVML) + nvmlDevice_t nvml_device; + nvmlUtilization_t nvml_utilization; - //nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); - //nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); - //device_info[device].load = nvml_utilization.gpu; -// - //printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); + nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); + nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); + device_info[device].load = nvml_utilization.gpu; + + printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); + #else + device_info[device].load = device_info[device].task_count; + #endif /* PARSEC_HAVE_CUDA_NVML */ return device_info[device].load; From 80149d0ed5b8d4e878321b83a6b3d95741b0ee50 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Apr 2022 17:41:58 -0400 Subject: [PATCH 017/215] NVML library linked using cmake --- parsec/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index 6aca9788f..d2e26eb42 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -229,6 +229,7 @@ if( BUILD_PARSEC ) $<$:OTF2::OTF2> $<$:MPI::MPI_C> $<$:CUDA::cudart> + $<$:CUDA::nvml> ${EXTRA_LIBS} INTERFACE ${PARSEC_ATOMIC_SUPPORT_LIBS} From 161bb686ea3f327d14885686413e925b91695464 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Apr 2022 17:43:57 -0400 Subject: [PATCH 018/215] PARSEC_HAVE_CUDA_NVML check changed to PARSEC_HAVE_CUDA. if cuda is available, the cuda nvml library will be available along with it. --- parsec/mca/device/cuda/device_cuda_migrate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 057bc8ca9..9dc965827 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -19,7 +19,7 @@ int parsec_cuda_migrate_init(int ndevices) { int i; cudaError_t cudastatus; - #if defined(PARSEC_HAVE_CUDA_NVML) + #if defined(PARSEC_HAVE_CUDA) nvmlReturn_t nvml_ret; #endif @@ -34,7 +34,7 @@ int parsec_cuda_migrate_init(int ndevices) migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } - #if defined(PARSEC_HAVE_CUDA_NVML) + #if defined(PARSEC_HAVE_CUDA) nvml_ret = nvmlInit_v2(); #endif @@ -47,7 +47,7 @@ int parsec_cuda_migrate_fini() int i; free(device_info); - #if defined(PARSEC_HAVE_CUDA_NVML) + #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); #endif @@ -76,9 +76,10 @@ int parsec_cuda_get_device_load(int device) { unsigned int nvml_dev; - #if defined(PARSEC_HAVE_CUDA_NVML) + #if defined(PARSEC_HAVE_CUDA) nvmlDevice_t nvml_device; nvmlUtilization_t nvml_utilization; + nvmlReturn_t nvml_ret; nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); @@ -87,7 +88,7 @@ int parsec_cuda_get_device_load(int device) printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); #else device_info[device].load = device_info[device].task_count; - #endif /* PARSEC_HAVE_CUDA_NVML */ + #endif /* PARSEC_HAVE_CUDA */ return device_info[device].load; From b32f4a7db26857e8ea5567aeddd09ce95a4f22fd Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Apr 2022 22:55:45 +1000 Subject: [PATCH 019/215] NVML header file added --- parsec/mca/device/cuda/device_cuda_migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index be64ce6f2..119ed1364 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -8,7 +8,7 @@ #include "parsec/scheduling.h" #include #include - +#include typedef struct parsec_device_cuda_info_s { int task_count; From 387c1635554b0db8270ddbf83b52e81325f93ba1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Apr 2022 08:03:01 +1000 Subject: [PATCH 020/215] testing first stage protocol --- .../mca/device/cuda/device_cuda_component.c | 6 ++ parsec/mca/device/cuda/device_cuda_migrate.c | 100 ++++++++++++++---- parsec/mca/device/cuda/device_cuda_migrate.h | 5 + parsec/mca/device/cuda/device_cuda_module.c | 19 +++- parsec/mca/device/device.c | 1 + tests/dsl/ptg/cuda/HelloWorldCuda.jdf | 58 ++++++++-- 6 files changed, 161 insertions(+), 28 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 22f05d83a..ffa5de4f0 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -26,6 +26,8 @@ #include #include +#include "parsec/mca/device/cuda/device_cuda_migrate.h" + PARSEC_OBJ_CLASS_INSTANCE(parsec_device_cuda_module_t, parsec_device_module_t, NULL, NULL); static int device_cuda_component_open(void); @@ -252,6 +254,8 @@ static int device_cuda_component_open(void) return MCA_ERROR; } + parsec_cuda_migrate_init(parsec_device_cuda_enabled); + return MCA_SUCCESS; } @@ -271,6 +275,8 @@ static int device_cuda_component_close(void) return MCA_SUCCESS; } + parsec_cuda_migrate_fini(); + for( i = 0; NULL != (cdev = (parsec_device_cuda_module_t*)parsec_device_cuda_component.modules[i]); i++ ) { parsec_device_cuda_component.modules[i] = NULL; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9dc965827..5d16d0734 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -2,7 +2,7 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" extern int parsec_device_cuda_enabled; -static parsec_device_cuda_info_t* device_info; +parsec_device_cuda_info_t* device_info; static parsec_list_t** migrated_task_list; static int NDEVICES; @@ -38,6 +38,8 @@ int parsec_cuda_migrate_init(int ndevices) nvml_ret = nvmlInit_v2(); #endif + printf("Migration module initialised for %d devices \n", NDEVICES); + return 0; } @@ -46,7 +48,8 @@ int parsec_cuda_migrate_fini() { int i; - free(device_info); + + #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); #endif @@ -55,7 +58,12 @@ int parsec_cuda_migrate_fini() { PARSEC_OBJ_RELEASE(migrated_task_list[i]); } - free(migrated_task_list); + //free(migrated_task_list); + + //free(device_info); + + + printf("Migration module shut down \n"); return 0; @@ -129,6 +137,9 @@ int parsec_cuda_set_device_load(int device, int load) int parsec_cuda_set_device_task(int device, int task_count) { + printf("Device %d: Current load %d, new load %d \n", + device, device_info[device].task_count, device_info[device].task_count+task_count); + device_info[device].task_count += task_count; return device_info[device].task_count; } @@ -162,15 +173,14 @@ int is_starving(int device) int find_starving_device(int dealer_device) { int i; - printf(" find_starving_device: Total_Dev %d Dealer_Dev %d\n", NDEVICES, dealer_device); // 0 device is the CPU, 1 is recursive - for(i = 2; i < (2 + NDEVICES); i++) + for(i = 0; i < NDEVICES; i++) { - printf("Trying_Dev %d Dealer_Dev %d\n", i, dealer_device); if( i == dealer_device) continue; + //printf("Find_starving_device: Total_Dev %d Dealer_Dev %d starving device %d\n", NDEVICES, dealer_device, i); //if(is_starving(i)) return i; } @@ -215,6 +225,8 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, parsec_list_t* li = migrated_task_list[starving_device_index]; parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); + printf("Migrated task enqueued to the recieved_task_queue of device %d \n", starving_device_index); + return 0; } @@ -245,8 +257,12 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) break; } - if(task != NULL) - parsec_cuda_kernel_scheduler(es, task, i); + if(task != NULL) + { + + printf("Migrated task dequeued from the recieved_task_queue of device %d and scheduled to the device %d\n", i, i); + parsec_cuda_kernel_scheduler(es, task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 0 */ + } } /** @@ -262,7 +278,6 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, { parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); parsec_cuda_set_device_task(starving_device_index, 1); - printf("Task migrated to device %d \n", starving_device_index); return starving_device_index; } @@ -279,30 +294,77 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) { int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; - int half = 0; + int half = 0, nb_migrated = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; + if(dealer_device->mutex < 3) // make sure dealer does not starve + return -1; + + //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); + dealer_device_index = dealer_device->super.device_index; starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return -1; - dealer_device_index = dealer_device->super.device_index; - dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); - - do - { + //do + //{ migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); if(migrated_gpu_task != NULL) { + nb_migrated++; parsec_cuda_set_device_load(dealer_device_index, -1); // decrement task count at the dealer device parsec_cuda_set_device_load(starving_device_index, 1); // increment task count at the starving device parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); } - else - break; + //else + // break; + + //half++; + //}while(half < (dealer_task_count / 2) ); + + //if(nb_migrated > 0) + // printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); + + return nb_migrated; +} + + + +/** + * @brief Tasks is migrated immediatly. + * Mainly used for validating migration protocol. + * + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int + */ + +int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, + parsec_gpu_task_t* migrated_gpu_task) +{ + int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; + int half = 0; + + starving_device_index = find_starving_device(dealer_device_index); + if(starving_device_index == -1) + return -1; + + dealer_device_index = dealer_device->super.device_index; + + + if(migrated_gpu_task != NULL) + { + parsec_cuda_set_device_load(dealer_device_index, -1); // decrement task count at the dealer device + parsec_cuda_set_device_load(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + return 1; + } + + return 0; - half++; - }while(half < (dealer_task_count / 2) ); + } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 119ed1364..010fd9942 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -17,6 +17,7 @@ typedef struct parsec_device_cuda_info_s { } parsec_device_cuda_info_t; int parsec_cuda_migrate_init(int ndevices); +int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); int parsec_cuda_get_device_task(int device); int parsec_cuda_set_device_load(int device, int load); @@ -27,6 +28,10 @@ parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, int starving_device_index, parsec_gpu_task_t *migrated_gpu_task); +int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, + parsec_gpu_task_t* migrated_gpu_task); +int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); + #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index c3581fb98..78b21c0fb 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2540,6 +2540,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_gpu_task_t *gpu_task, int which_gpu ) { + //which_gpu = 2; //CHANGE THIS, only for testing. device 0 is cpu, device 1 is recursive, device 2 is the first cuda gpu + //printf("Which_gpu = %d \n", which_gpu-2); + parsec_device_gpu_module_t* gpu_device; parsec_device_cuda_module_t *cuda_device; cudaError_t status; @@ -2614,7 +2617,15 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->priority ); } - //migrate_if_starving(es, gpu_device); + //if(which_gpu == 2) //CHANGE THIS, only for testing + //{ + // printf("Immediatly Migrate task from device %d \n", which_gpu-2); + // if(migrate_immediate(es, gpu_device, gpu_task)) + // { + // rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); + // return PARSEC_HOOK_RETURN_ASYNC; + // } + //} rc = progress_stream( gpu_device, gpu_device->exec_stream[0], @@ -2712,7 +2723,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * the manager checks if there are any starving devices and migrate tasks, * to the starving device, if there are available tasks to migrate. */ - //migrate_if_starving(es, gpu_device); + printf("Available tasks %d \n", gpu_device->mutex); + rc = migrate_if_starving(es, gpu_device); + if( rc > 0) + parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); + //parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index e569e98e4..dccb8fce0 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -168,6 +168,7 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } + dev_index = 2; //CHANGE THIS, only for testing all task mapped to first GPU device return dev_index; } diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf index 1dc21c348..015a7549c 100644 --- a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf +++ b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf @@ -1,6 +1,7 @@ extern "C" %{ #include "parsec.h" +#include #include #include "parsec/parsec_config.h" @@ -23,23 +24,55 @@ extern "C" %{ %} -HelloWorldCuda(k) +NT [ type="int" ] +NL [ type="int" ] + + +Start(k) k = 0 .. 0 : taskdist( k ) -WRITE A <- NEW +WRITE A <- NEW + -> A HelloWorldCuda( 1 .. NT ) + +BODY +{ + printf("Start on the CPU \n"); +} +END + + + +HelloWorldCuda(k) + +k = 1 .. NT + +: taskdist( k ) + +READ A <- A Start( 0 ) BODY [type=CUDA weight=1] { - fprintf(stderr, "HelloWorldCuda on GPU \n"); + int i, j, a[100]; + for( i = 0; i < NL; i++ ) + for( j = 1; j < 100; j++ ) + a[i] = a[i] + a[i-1]; + + + printf("HelloWorldCuda on GPU \n"); } END BODY { - fprintf(stderr, "HelloWorldCuda is running on a CPU, which is not the intended behavior \n"); + int i, j, a[100]; + for( i = 0; i < NL; i++ ) + for( j = 1; j < 100; j++ ) + a[i] = a[i] + a[i-1]; + + printf("HelloWorldCuda is running on a CPU, which is not the intended behavior \n"); } END @@ -74,11 +107,22 @@ data_key(parsec_data_collection_t *desc, ...) int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rc; - int rank, world; + int rc, rank, world, NT = 10, NL = 1; parsec_data_collection_t taskdist; parsec_HelloWorldCuda_taskpool_t *tp; + if(argc < 3) + { + printf("Usasge example: HelloWorldCuda [#tasks] [#loops per task]"); + exit(0); + } + else + { + NT = atoi(argv[1]); + NL = atoi(argv[2]); + } + + #if defined(PARSEC_HAVE_MPI) { int provided; @@ -99,7 +143,7 @@ int main(int argc, char *argv[]) taskdist.vpid_of = vpid_of; taskdist.data_key = data_key; - tp = parsec_HelloWorldCuda_new(&taskdist); + tp = parsec_HelloWorldCuda_new(&taskdist, NT, NL); //parsec_add2arena( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], // parsec_datatype_double_complex_t, From c50a0462c239f353c06303272127c0e71e0d2d49 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Apr 2022 09:05:54 +1000 Subject: [PATCH 021/215] task counting correcetd. parsec_cuda_set_device_load() was used instead of parsec_cuda_set_device_task() --- parsec/mca/device/cuda/device_cuda_module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 78b21c0fb..bc025143c 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2589,7 +2589,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - parsec_cuda_set_device_load(gpu_device->super.device_index, 1); // increment task count for this device + parsec_cuda_set_device_task(gpu_device->super.device_index, 1); // increment task count for this device if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); @@ -2662,7 +2662,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * immediatly befor the execution of the task. * TODO: Should this be moved to when the task is completed? */ - parsec_cuda_set_device_load(gpu_device->super.device_index, -1); + parsec_cuda_set_device_task(gpu_device->super.device_index, -1); rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], From 1e9df02df47e711952f9665f5d3ba4ecce5fff09 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Apr 2022 09:14:00 +1000 Subject: [PATCH 022/215] device index corrected. We were using dealer_device->super.device_index, while the correct index was dealer_device->super.device_index-2 --- parsec/mca/device/cuda/device_cuda_migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 5d16d0734..4415887f4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -297,11 +297,12 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module int half = 0, nb_migrated = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; - if(dealer_device->mutex < 3) // make sure dealer does not starve + //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); + dealer_device_index = dealer_device->super.device_index - 2; + + if(parsec_cuda_get_device_task(dealer_device_index) < 3) // make sure dealer does not starve return -1; - //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); - dealer_device_index = dealer_device->super.device_index; starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return -1; @@ -351,7 +352,7 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t if(starving_device_index == -1) return -1; - dealer_device_index = dealer_device->super.device_index; + dealer_device_index = dealer_device->super.device_index - 2; if(migrated_gpu_task != NULL) From 02acc55d78f885bc16e8b9d26ff0aaffca5b3689 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Apr 2022 12:15:20 +1000 Subject: [PATCH 023/215] Task counting in each device corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 31 ++++++++------------ parsec/mca/device/cuda/device_cuda_migrate.h | 2 ++ parsec/mca/device/cuda/device_cuda_module.c | 27 +++++++++-------- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4415887f4..8f7aa923a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -124,8 +124,8 @@ int parsec_cuda_get_device_task(int device) int parsec_cuda_set_device_load(int device, int load) { - device_info[device].load += load; - return device_info[device].load; + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); + return rc+load; } /** @@ -137,11 +137,9 @@ int parsec_cuda_set_device_load(int device, int load) int parsec_cuda_set_device_task(int device, int task_count) { - printf("Device %d: Current load %d, new load %d \n", - device, device_info[device].task_count, device_info[device].task_count+task_count); - - device_info[device].task_count += task_count; - return device_info[device].task_count; + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].task_count), task_count); + //printf("Device %d: Prev load %d, Current load = %d \n", device, rc, rc+task_count); + return rc+task_count; } /** @@ -277,8 +275,6 @@ int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, parsec_gpu_task_t *migrated_gpu_task) { parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); - parsec_cuda_set_device_task(starving_device_index, 1); - return starving_device_index; } @@ -298,7 +294,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_gpu_task_t *migrated_gpu_task = NULL; //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); - dealer_device_index = dealer_device->super.device_index - 2; + dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); if(parsec_cuda_get_device_task(dealer_device_index) < 3) // make sure dealer does not starve return -1; @@ -313,8 +309,8 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module if(migrated_gpu_task != NULL) { nb_migrated++; - parsec_cuda_set_device_load(dealer_device_index, -1); // decrement task count at the dealer device - parsec_cuda_set_device_load(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device + parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); } @@ -345,20 +341,17 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, parsec_gpu_task_t* migrated_gpu_task) { - int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; - int half = 0; + int starving_device_index = -1, dealer_device_index = 0; + dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return -1; - dealer_device_index = dealer_device->super.device_index - 2; - - if(migrated_gpu_task != NULL) { - parsec_cuda_set_device_load(dealer_device_index, -1); // decrement task count at the dealer device - parsec_cuda_set_device_load(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device + parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); return 1; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 010fd9942..544841b3e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -10,6 +10,8 @@ #include #include +#define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) + typedef struct parsec_device_cuda_info_s { int task_count; int load; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index bc025143c..32ff38f7a 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2589,7 +2589,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - parsec_cuda_set_device_task(gpu_device->super.device_index, 1); // increment task count for this device + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1); // increment task count for this device if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); @@ -2657,13 +2657,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->priority ); } - /** - * @brief decrement the task count for the device. The decrement is done - * immediatly befor the execution of the task. - * TODO: Should this be moved to when the task is completed? - */ - parsec_cuda_set_device_task(gpu_device->super.device_index, -1); - rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], NULL, @@ -2723,11 +2716,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * the manager checks if there are any starving devices and migrate tasks, * to the starving device, if there are available tasks to migrate. */ - printf("Available tasks %d \n", gpu_device->mutex); - rc = migrate_if_starving(es, gpu_device); - if( rc > 0) - parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); - //parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); + //printf("Available tasks %d \n", gpu_device->mutex); + //rc = migrate_if_starving(es, gpu_device); + //if( rc > 0) + // parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); + // //parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { @@ -2772,6 +2765,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_cuda_kernel_epilog( gpu_device, gpu_task ); __parsec_complete_execution( es, gpu_task->ec ); gpu_device->super.executed_tasks++; + + /** + * @brief decrement the task count for the device. The decrement is done + * immediatly befor the execution of the task. + * TODO: Should this be moved to when the task is completed? + */ + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); + remove_gpu_task: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; parsec_device_load[gpu_device->super.device_index] -= parsec_device_sweight[gpu_device->super.device_index]; From 41ba480451855a238e3f42a828ed3a9ffc8727a2 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Apr 2022 12:25:13 +1000 Subject: [PATCH 024/215] parsec_cuda_migrate_fini() updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 8f7aa923a..e4f2695de 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -47,8 +47,6 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini() { int i; - - #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); @@ -58,10 +56,8 @@ int parsec_cuda_migrate_fini() { PARSEC_OBJ_RELEASE(migrated_task_list[i]); } - //free(migrated_task_list); - - //free(device_info); - + free(migrated_task_list); + free(device_info); printf("Migration module shut down \n"); From 2ab276f1ba499ae12f59c954c040c3bfcf241c53 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Apr 2022 00:50:16 +1000 Subject: [PATCH 025/215] test first stage migration --- parsec/mca/device/cuda/device_cuda_migrate.c | 5 ++--- parsec/mca/device/cuda/device_cuda_module.c | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e4f2695de..b1b0fcda7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -291,13 +291,12 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if(parsec_cuda_get_device_task(dealer_device_index) < 3) // make sure dealer does not starve - return -1; + return 0; starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) - return -1; + return 0; //do //{ diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 32ff38f7a..c28d0a5aa 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2717,10 +2717,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * to the starving device, if there are available tasks to migrate. */ //printf("Available tasks %d \n", gpu_device->mutex); - //rc = migrate_if_starving(es, gpu_device); - //if( rc > 0) - // parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); - // //parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); + rc = migrate_if_starving(es, gpu_device); + if( rc > 0) + parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { From bfbe906357d405d8b2354a23a89cf10b3e1848ee Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Apr 2022 04:28:15 +1000 Subject: [PATCH 026/215] Task are made singleton when enqueued and dequeued --- parsec/mca/device/cuda/device_cuda_migrate.c | 21 ++++++++++++-------- tests/dsl/dtd/dtd_bench_simple_gemm.c | 2 +- tests/dsl/ptg/cuda/HelloWorldCuda.jdf | 6 +++--- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b1b0fcda7..c247593b8 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -171,7 +171,7 @@ int find_starving_device(int dealer_device) // 0 device is the CPU, 1 is recursive for(i = 0; i < NDEVICES; i++) { - if( i == dealer_device) + if( i == dealer_device || i == 0) continue; //printf("Find_starving_device: Total_Dev %d Dealer_Dev %d starving device %d\n", NDEVICES, dealer_device, i); @@ -216,11 +216,12 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, parsec_task_t *task, int starving_device_index) { + char tmp[MAX_TASK_STRLEN]; parsec_list_t* li = migrated_task_list[starving_device_index]; parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); - - printf("Migrated task enqueued to the recieved_task_queue of device %d \n", starving_device_index); - + //printf("Enqueue task %s to %d\n",parsec_task_snprintf(tmp, MAX_TASK_STRLEN, task), starving_device_index); + printf("Enqueue Task(%d) from %d\n", task->locals[0].value); + //printf("Enqueue task to %d\n", starving_device_index); return 0; } @@ -240,6 +241,7 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) { int i; + char tmp[128]; parsec_task_t * task = NULL; parsec_list_t* li = NULL; @@ -253,9 +255,10 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) if(task != NULL) { - - printf("Migrated task dequeued from the recieved_task_queue of device %d and scheduled to the device %d\n", i, i); - parsec_cuda_kernel_scheduler(es, task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 0 */ + PARSEC_LIST_ITEM_SINGLETON(task); + //printf("Dequeue task %s from %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, task), i); + printf("Dequeue Task(%d) from %d\n", task->locals[0].value , i); + parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } } @@ -303,11 +306,13 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); if(migrated_gpu_task != NULL) { + + PARSEC_LIST_ITEM_SINGLETON(migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device + printf("Task(%d) migrated from device %d to device %d: %d \n", ((parsec_task_t *) migrated_gpu_task)->locals[0].value, dealer_device_index, starving_device_index, nb_migrated); parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); - printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); } //else // break; diff --git a/tests/dsl/dtd/dtd_bench_simple_gemm.c b/tests/dsl/dtd/dtd_bench_simple_gemm.c index b0f000999..938287d11 100644 --- a/tests/dsl/dtd/dtd_bench_simple_gemm.c +++ b/tests/dsl/dtd/dtd_bench_simple_gemm.c @@ -8,7 +8,7 @@ #if BLAS_WITH_ESSL #include "essl.h" #else -#include "cblas.h" +#include "mkl_cblas.h" #endif #endif diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf index 015a7549c..0de0d3f04 100644 --- a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf +++ b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf @@ -55,10 +55,10 @@ READ A <- A Start( 0 ) BODY [type=CUDA weight=1] { - int i, j, a[100]; + int i, j, a = 1, b = 2, c = 0; for( i = 0; i < NL; i++ ) - for( j = 1; j < 100; j++ ) - a[i] = a[i] + a[i-1]; + for( j = 0; j < 100; j++ ) + c = a + b; printf("HelloWorldCuda on GPU \n"); From 6cac4a9b766213e35e64b9c6996843df98984efc Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Apr 2022 05:20:29 +1000 Subject: [PATCH 027/215] first level migration working --- parsec/mca/device/cuda/device_cuda_migrate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c247593b8..f8b3c9d43 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,4 +1,3 @@ - #include "parsec/mca/device/cuda/device_cuda_migrate.h" extern int parsec_device_cuda_enabled; @@ -255,8 +254,8 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) if(task != NULL) { - PARSEC_LIST_ITEM_SINGLETON(task); - //printf("Dequeue task %s from %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, task), i); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); + //printf("Dequeue task %s from %d\n", parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, task), i); printf("Dequeue Task(%d) from %d\n", task->locals[0].value , i); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } @@ -307,7 +306,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module if(migrated_gpu_task != NULL) { - PARSEC_LIST_ITEM_SINGLETON(migrated_gpu_task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device From a12bbcab9d155e27018f2dc725422745ac07d391 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Apr 2022 23:52:10 +1000 Subject: [PATCH 028/215] starvation check added --- .../mca/device/cuda/.device_cuda_migrate.c.swp | Bin 0 -> 24576 bytes parsec/mca/device/cuda/device_cuda_migrate.c | 7 ++++--- tests/dsl/ptg/cuda/HelloWorldCuda.jdf | 9 +++++---- 3 files changed, 9 insertions(+), 7 deletions(-) create mode 100644 parsec/mca/device/cuda/.device_cuda_migrate.c.swp diff --git a/parsec/mca/device/cuda/.device_cuda_migrate.c.swp b/parsec/mca/device/cuda/.device_cuda_migrate.c.swp new file mode 100644 index 0000000000000000000000000000000000000000..857fe89bd1b3c601500583586dc141e17085ad14 GIT binary patch literal 24576 zcmeI4dypK(dB6vo*b&$!1!ZCqs{gt}m4lgITFo_F_ChiBd}al-T` z-cj_fzWRc5&l{B+D_^sIEgWvdvtcy6>D&?dHWF3CkvN$h@!Oe4%dKkSr`5Ug;cG@W zmrd^Fc|U0dRrk@X`sG@%5LSb7wOw<6SLE+XGo0n|V5FMKa`5Ls0s{%GMgr}$K77He zy>m8?>Y=`F?OE!yQ>Rz+h+y|IYva+$o-SKl~vafpIt= zeiqJyQTW%_c;2J%2z&(I2fqjJg%D=pD%b{R!pq^`U+sC%!dGDt-U#d91o)3vdES@d zV~|1wP1pm&uoljOubk|8kHhET0k|JN3m=9L!OidoumCe~1ss5LVFaE#$@6{ykHM$l zUbqR~18;?2fw#a7un45&=1h>NxUSNcxKrCO<kqd4`-Lqqhar?_k_lIq6PZyl~w z<93v8(Ra$_`Gm^V3u|}kLnvx}?IN`|2^Lf%_G^4AsU7Vk38=sRVJknWtD?0fThWqA zszN5?qRCmBoK)3TWzM@){4{Qc)k;0!JWR!})hZ-7XLDgg!3cw|_4uIq`_M?Yn#GdJ zeA(h1^(14Pr#>aq3et+rLsAyxzWH9#a?d>OA=9#!>ni$Xr4FIYmhP@h=TSDYkCZR9 zOT%ir;U}&;`}Tzh^d*p$WP_CS$8qE%X_m-GKlsv1MzR&9URE4=-CS7<`n79z*?up+ zWUX-nPe$hR?CR{m>(@Au&lW1mwJwNijNS5(+Pi(<{;?gEOST^vtL&KGxn0zC&2mpO zw))-b>bp^{bwQ&Q=yKdppFA)zu6CO7J}%v~Ywxs->xrNlCySjC9xXH*m3A67!t4Ar zjH8j+`F2Sy`L(H^V$|z0`qfCQ!q%lvlAo!f&J$VFBKgx)ZC8*^cMIRgLAaJGm`qF| zCAFcqGS>FwGP%T$YK`Dci@U|zRu(o}gX5kydD#bb6XfZA+HO};Qyx|8Ry0#7cRwSf zZaUKnm&6ofdxXxe1@$lrYK7h&7v1G*N3C#{``Uu(R+7PSMuCVqGrC`bQDC}O4XeFD zQk6MCY+~39RJ_29C4*w#Z>6f`H|HDt9VB5~Q?)iT%B(sv7gp!gY$KlW8_LSOsBDf$ z{Fa&tBv}%OnMl-;BuvvF661_oyqF^6i25Z8RSwkQAc;_aCFKRz230zWGUG_s9jzac z@_Lm>&5=4i603R`G-@sCA~iybZ0+S4J&EKeXAy_i^p%X>4W@gIT~V9tpm76pLLc~K zs8Rb`8!1suf-r#PyzpMJugL1v9zX zU-fEEmVwyJmGxAFC^a2Rk~pEJ@;8g{wB0iOa9j-* zRn%tMRqnWk+9k{^L9Do)DH4^Y;2;?`Hgjvbf{M5&r zNZM6A7N*)HxTYN@LCyFPo-{G6>gmYIcZ>Tmr>{dpw(uZhgSL1!&C*^Q%4#i^EV!0t z7>}vXGeP9fGy+*TtqY@Sqg@NsT9d52nv?W%Yd!J*Psf+P6vY3>#njig`2GY)9N@Dc zF#t(ZghQ|gcEcsG6HbJ0;@^KA?u6^10duey#zBF^0ZxNc;hXsU_rWpvAp9D<6J86? z;paaAUxj~!hv7l^JUjsR!P{Xc{1luH5)Zf?ZUKn_yag_WOP~ni^M4=y7(NVdhe`PM z8qfO{yc@Q|Hc)UTyaCRDpN47_Xu>3%47cLP55rRk?@!=1xD{@Jn_&-pAHVxy zcmQHJ4L*Z^eGKk_+u(!n0XPH)VJ~cjGr;-be~<6|H*gOuz#+I4cES!Q!FmwC{6;vh z>x0{#eZe0x)bp2{sCP0rf_9*$#Nk|DD>C=SO(qrX8H?E+z8I_MH(Ccb9U58i8zjXU zEp9QBoaC8xyiH#6m{RLoxxDNUU;%=U%iAR3Oi+<8lDLpI=Sym0`<0cc?fdss_D}5_ zADcApGX9`0ZH=8-#&_?Zs_dQ`o2cyHJ-KUqZ0fSfLe4~oY0#{sLq*Bg+UlcbRT0(; zx@Ko>Rg=@><3**=6|U~ACf{aQ#~toYM7e9GrbO(e5Gk78++H(jFq0PaW_GA4!<7J9~|9$ zfh~Gjn{U>ibe-_M-K1E*{$%$6EISHj@^~d)ZZ=+1q#P7=CV!~zs!GscNnG|ovB9MJ-5jp#bqUagu~R;5*)3uJ+v3}0+k+9R!?5meJc zaYRi?m2|E_)M6^6j=mA7Q$CZi&Uc&f0rWGj8RlW?EKSrdoT^k@1yE~dxXKZqMLI321Pj`ULFk(ynVO#-fa^Krq(gxFDd5gXzGc9XifrxksnOa;(Z^*$kE`T`X0n z2H8Q=ifoZ9nx_@3tlyBU=mE-T3mGPHlHx2k!cu+->dcnzk_AoQ4GZX28AELiqaOor-2Z}T0e4Oup^`#URUdAxHC zi7hC3lBrWPd4bTHpTKC%am-Ii=E+>VxYKAe{m25}Ijlx%+0~p~nMq*SXJF-8gjZn0 z)VZHs7uX1NuIdOinhk97i_}z{`i;s?;?j6-#IAG^_Uryk-f~+{!k#C>=@i>-!BW3N z?ini}Q<0S)t4UW^j7y=Dk!^Bzgyjm#-lwQO!Z6beGi2X9)z+yKMb(ZSE%~TT2wGrwqJH;%K7USxw7zS3$c8{p#YL=~oL3 zx{l3@Vc%v0Br8=~rz3&hPg&AzJrS8=^q+dNlrWI>dm4+13CqOT(%4NVSbjG(==sNw z7DX%b(R}SviMNFyTo}Pl8)gu+y6%^Wfa}7onGaK4aWik2S*OM9Ci9h(w3~dxsFmvQ znA!AUcH@m>=9ES2ljR2M@F>oc?&%t|se9jdrcy|C3$okCbFdTY8y{ulb_)ym_q zx7v16a&BUeMbIl#Q&FjmTwR~|GV-^PjV`k*%Y%n zK+nq3WXIm=l5C@if6HQ^6)ps(F1aOK-?1era@Apw8jYR0Z0BVc(SrfO!uC9lH2$^I zD5uHtwC;GLvDT$$2F0%R_v^Ry(QVh7`6Ql^NG38scX~R&6-~G;iro{U+16NOLjp!S zjBND4HVP|AMS{n@q@(O$US=}{55f$rKDZnv;AD6R|NeHk1@^%ZYydeE@CtYo|Ne_`4_psN zLH7I0a5BhV|1~fVr^5sI^Phve;7<4u+yQ?CH7LQe`1U`9$KZG2Z1@CzehTM95zdAU zuok|ApMMml;c|EuAOHQZ2X?`QP=xjHMtBmx{_F5K+y{5SJK^F z#qel$$3$$I4RzUFlg_cn%&pNadjB8y(K{wD;^Y6%S?_TMzy2zCGyEb)-(d z{Wka|*aNa}Ux2mnN#ZZN;aU3ON%$K49XtdN!e`*q@K>Pw%6a*n@TLje5f-gjyx0Ljrh zf|HzJNrI-FtJDEveJo$uy+-$>)qGD%k}F&P-qLbL%2l1s1YKAySiUgxV74d|Vr6l# zq~*#T1Tf;4I@vBD0`1Ay)hsDo^Lo}C&u3Rdit5v1HeKZ8bj6%ctV~W%PolKTRc&~*Cz5R>OnDd_BQMD!f7$MGYQqLo_XvagvFJ&$R?wBO z*$iqSr=8vTjQ)oON*z_O4ax85%P<1XVmAR2QjxfO{Ejuc0cf67# zFQLT2vD_waIdR5hk;c`yk&Qo3F_!cjE3OL4wZE}AVzTDI}8t8^iUoGJDd$(|F0(4M2^Rv2GZ7LdVDwl8I6m%mf z%=wKvad4-IGTO~cII65R7gffXl*^KK<3_t_C(4AO;OaSGQ!eMGx30yLv{@VR(%3ki ztKFs*5$gj4&riyo^G3S$`h=&;pmU-2DD_uZMl%%*!i zl*7NtVud4M1tqVBW%hN4FP@k8WTM&5fGL+veahupY(|`vn_m$82qz=YwR%Ynvf9`C z+ET8K#`+Yo73UOX;5YrlyaXVJkW4b(1;Fac0WYnz=Cz(aIlRD05bMr`IE~yhcB=Cl zpFX6$nl|K*9dtWe4J}bKYTG?tCv%~;m&~iV5p5<~so)Eor8~AH*DgJI=O*BKcW>Nw z+>4)rWky(W3brB_IoTB-a$@vJXq_1~q;t?|qfF*_i74k^hh+%0`pm@1Z;17s!F zHBzE1{VrSlib`p=touFfep^-zmpzDOHUGE=v8<}Nf|$PI7N_+Wf{S|ll(E42Rrjpg zr{7s?$w`D+)?N#pR~G4J>D)lxSWt15a}PmH9uW;gRebCTTEKFP@A;FAw!(@t2J=BR ztn&sySMS`JQRQblzq=Ia)5-2|O`@GG*Ui~}DB7cZ;l|D4x45?hz|DwNdH(}q`3xHX literal 0 HcmV?d00001 diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index f8b3c9d43..2863c5aec 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -147,7 +147,8 @@ int parsec_cuda_set_device_task(int device, int task_count) */ int is_starving(int device) { - if( device_info[device].load < 1 && device_info[device].task_count < 1 ) + //if( device_info[device].load < 1 && device_info[device].task_count < 1 ) + if( device_info[device].task_count < 1 ) return 1; else return 0; @@ -170,11 +171,11 @@ int find_starving_device(int dealer_device) // 0 device is the CPU, 1 is recursive for(i = 0; i < NDEVICES; i++) { - if( i == dealer_device || i == 0) + if( i == dealer_device ) continue; //printf("Find_starving_device: Total_Dev %d Dealer_Dev %d starving device %d\n", NDEVICES, dealer_device, i); - //if(is_starving(i)) + if(is_starving(i)) return i; } diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf index 0de0d3f04..b342e2ad3 100644 --- a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf +++ b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf @@ -54,11 +54,12 @@ k = 1 .. NT READ A <- A Start( 0 ) BODY [type=CUDA weight=1] -{ - int i, j, a = 1, b = 2, c = 0; +{ + int a[2048]; + int i, j; for( i = 0; i < NL; i++ ) - for( j = 0; j < 100; j++ ) - c = a + b; + for( j = 1; j < 2048; j++ ) + a[i] = a[i] + a[i-1]; printf("HelloWorldCuda on GPU \n"); From 0270b423be4ec8fc7917cc9c22f07c933ba9a369 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 30 Apr 2022 00:23:00 +1000 Subject: [PATCH 029/215] task name printed using parsec_task_snprintf() --- .../mca/device/cuda/.device_cuda_migrate.c.swp | Bin 24576 -> 0 bytes parsec/mca/device/cuda/device_cuda_migrate.c | 14 ++++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 parsec/mca/device/cuda/.device_cuda_migrate.c.swp diff --git a/parsec/mca/device/cuda/.device_cuda_migrate.c.swp b/parsec/mca/device/cuda/.device_cuda_migrate.c.swp deleted file mode 100644 index 857fe89bd1b3c601500583586dc141e17085ad14..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24576 zcmeI4dypK(dB6vo*b&$!1!ZCqs{gt}m4lgITFo_F_ChiBd}al-T` z-cj_fzWRc5&l{B+D_^sIEgWvdvtcy6>D&?dHWF3CkvN$h@!Oe4%dKkSr`5Ug;cG@W zmrd^Fc|U0dRrk@X`sG@%5LSb7wOw<6SLE+XGo0n|V5FMKa`5Ls0s{%GMgr}$K77He zy>m8?>Y=`F?OE!yQ>Rz+h+y|IYva+$o-SKl~vafpIt= zeiqJyQTW%_c;2J%2z&(I2fqjJg%D=pD%b{R!pq^`U+sC%!dGDt-U#d91o)3vdES@d zV~|1wP1pm&uoljOubk|8kHhET0k|JN3m=9L!OidoumCe~1ss5LVFaE#$@6{ykHM$l zUbqR~18;?2fw#a7un45&=1h>NxUSNcxKrCO<kqd4`-Lqqhar?_k_lIq6PZyl~w z<93v8(Ra$_`Gm^V3u|}kLnvx}?IN`|2^Lf%_G^4AsU7Vk38=sRVJknWtD?0fThWqA zszN5?qRCmBoK)3TWzM@){4{Qc)k;0!JWR!})hZ-7XLDgg!3cw|_4uIq`_M?Yn#GdJ zeA(h1^(14Pr#>aq3et+rLsAyxzWH9#a?d>OA=9#!>ni$Xr4FIYmhP@h=TSDYkCZR9 zOT%ir;U}&;`}Tzh^d*p$WP_CS$8qE%X_m-GKlsv1MzR&9URE4=-CS7<`n79z*?up+ zWUX-nPe$hR?CR{m>(@Au&lW1mwJwNijNS5(+Pi(<{;?gEOST^vtL&KGxn0zC&2mpO zw))-b>bp^{bwQ&Q=yKdppFA)zu6CO7J}%v~Ywxs->xrNlCySjC9xXH*m3A67!t4Ar zjH8j+`F2Sy`L(H^V$|z0`qfCQ!q%lvlAo!f&J$VFBKgx)ZC8*^cMIRgLAaJGm`qF| zCAFcqGS>FwGP%T$YK`Dci@U|zRu(o}gX5kydD#bb6XfZA+HO};Qyx|8Ry0#7cRwSf zZaUKnm&6ofdxXxe1@$lrYK7h&7v1G*N3C#{``Uu(R+7PSMuCVqGrC`bQDC}O4XeFD zQk6MCY+~39RJ_29C4*w#Z>6f`H|HDt9VB5~Q?)iT%B(sv7gp!gY$KlW8_LSOsBDf$ z{Fa&tBv}%OnMl-;BuvvF661_oyqF^6i25Z8RSwkQAc;_aCFKRz230zWGUG_s9jzac z@_Lm>&5=4i603R`G-@sCA~iybZ0+S4J&EKeXAy_i^p%X>4W@gIT~V9tpm76pLLc~K zs8Rb`8!1suf-r#PyzpMJugL1v9zX zU-fEEmVwyJmGxAFC^a2Rk~pEJ@;8g{wB0iOa9j-* zRn%tMRqnWk+9k{^L9Do)DH4^Y;2;?`Hgjvbf{M5&r zNZM6A7N*)HxTYN@LCyFPo-{G6>gmYIcZ>Tmr>{dpw(uZhgSL1!&C*^Q%4#i^EV!0t z7>}vXGeP9fGy+*TtqY@Sqg@NsT9d52nv?W%Yd!J*Psf+P6vY3>#njig`2GY)9N@Dc zF#t(ZghQ|gcEcsG6HbJ0;@^KA?u6^10duey#zBF^0ZxNc;hXsU_rWpvAp9D<6J86? z;paaAUxj~!hv7l^JUjsR!P{Xc{1luH5)Zf?ZUKn_yag_WOP~ni^M4=y7(NVdhe`PM z8qfO{yc@Q|Hc)UTyaCRDpN47_Xu>3%47cLP55rRk?@!=1xD{@Jn_&-pAHVxy zcmQHJ4L*Z^eGKk_+u(!n0XPH)VJ~cjGr;-be~<6|H*gOuz#+I4cES!Q!FmwC{6;vh z>x0{#eZe0x)bp2{sCP0rf_9*$#Nk|DD>C=SO(qrX8H?E+z8I_MH(Ccb9U58i8zjXU zEp9QBoaC8xyiH#6m{RLoxxDNUU;%=U%iAR3Oi+<8lDLpI=Sym0`<0cc?fdss_D}5_ zADcApGX9`0ZH=8-#&_?Zs_dQ`o2cyHJ-KUqZ0fSfLe4~oY0#{sLq*Bg+UlcbRT0(; zx@Ko>Rg=@><3**=6|U~ACf{aQ#~toYM7e9GrbO(e5Gk78++H(jFq0PaW_GA4!<7J9~|9$ zfh~Gjn{U>ibe-_M-K1E*{$%$6EISHj@^~d)ZZ=+1q#P7=CV!~zs!GscNnG|ovB9MJ-5jp#bqUagu~R;5*)3uJ+v3}0+k+9R!?5meJc zaYRi?m2|E_)M6^6j=mA7Q$CZi&Uc&f0rWGj8RlW?EKSrdoT^k@1yE~dxXKZqMLI321Pj`ULFk(ynVO#-fa^Krq(gxFDd5gXzGc9XifrxksnOa;(Z^*$kE`T`X0n z2H8Q=ifoZ9nx_@3tlyBU=mE-T3mGPHlHx2k!cu+->dcnzk_AoQ4GZX28AELiqaOor-2Z}T0e4Oup^`#URUdAxHC zi7hC3lBrWPd4bTHpTKC%am-Ii=E+>VxYKAe{m25}Ijlx%+0~p~nMq*SXJF-8gjZn0 z)VZHs7uX1NuIdOinhk97i_}z{`i;s?;?j6-#IAG^_Uryk-f~+{!k#C>=@i>-!BW3N z?ini}Q<0S)t4UW^j7y=Dk!^Bzgyjm#-lwQO!Z6beGi2X9)z+yKMb(ZSE%~TT2wGrwqJH;%K7USxw7zS3$c8{p#YL=~oL3 zx{l3@Vc%v0Br8=~rz3&hPg&AzJrS8=^q+dNlrWI>dm4+13CqOT(%4NVSbjG(==sNw z7DX%b(R}SviMNFyTo}Pl8)gu+y6%^Wfa}7onGaK4aWik2S*OM9Ci9h(w3~dxsFmvQ znA!AUcH@m>=9ES2ljR2M@F>oc?&%t|se9jdrcy|C3$okCbFdTY8y{ulb_)ym_q zx7v16a&BUeMbIl#Q&FjmTwR~|GV-^PjV`k*%Y%n zK+nq3WXIm=l5C@if6HQ^6)ps(F1aOK-?1era@Apw8jYR0Z0BVc(SrfO!uC9lH2$^I zD5uHtwC;GLvDT$$2F0%R_v^Ry(QVh7`6Ql^NG38scX~R&6-~G;iro{U+16NOLjp!S zjBND4HVP|AMS{n@q@(O$US=}{55f$rKDZnv;AD6R|NeHk1@^%ZYydeE@CtYo|Ne_`4_psN zLH7I0a5BhV|1~fVr^5sI^Phve;7<4u+yQ?CH7LQe`1U`9$KZG2Z1@CzehTM95zdAU zuok|ApMMml;c|EuAOHQZ2X?`QP=xjHMtBmx{_F5K+y{5SJK^F z#qel$$3$$I4RzUFlg_cn%&pNadjB8y(K{wD;^Y6%S?_TMzy2zCGyEb)-(d z{Wka|*aNa}Ux2mnN#ZZN;aU3ON%$K49XtdN!e`*q@K>Pw%6a*n@TLje5f-gjyx0Ljrh zf|HzJNrI-FtJDEveJo$uy+-$>)qGD%k}F&P-qLbL%2l1s1YKAySiUgxV74d|Vr6l# zq~*#T1Tf;4I@vBD0`1Ay)hsDo^Lo}C&u3Rdit5v1HeKZ8bj6%ctV~W%PolKTRc&~*Cz5R>OnDd_BQMD!f7$MGYQqLo_XvagvFJ&$R?wBO z*$iqSr=8vTjQ)oON*z_O4ax85%P<1XVmAR2QjxfO{Ejuc0cf67# zFQLT2vD_waIdR5hk;c`yk&Qo3F_!cjE3OL4wZE}AVzTDI}8t8^iUoGJDd$(|F0(4M2^Rv2GZ7LdVDwl8I6m%mf z%=wKvad4-IGTO~cII65R7gffXl*^KK<3_t_C(4AO;OaSGQ!eMGx30yLv{@VR(%3ki ztKFs*5$gj4&riyo^G3S$`h=&;pmU-2DD_uZMl%%*!i zl*7NtVud4M1tqVBW%hN4FP@k8WTM&5fGL+veahupY(|`vn_m$82qz=YwR%Ynvf9`C z+ET8K#`+Yo73UOX;5YrlyaXVJkW4b(1;Fac0WYnz=Cz(aIlRD05bMr`IE~yhcB=Cl zpFX6$nl|K*9dtWe4J}bKYTG?tCv%~;m&~iV5p5<~so)Eor8~AH*DgJI=O*BKcW>Nw z+>4)rWky(W3brB_IoTB-a$@vJXq_1~q;t?|qfF*_i74k^hh+%0`pm@1Z;17s!F zHBzE1{VrSlib`p=touFfep^-zmpzDOHUGE=v8<}Nf|$PI7N_+Wf{S|ll(E42Rrjpg zr{7s?$w`D+)?N#pR~G4J>D)lxSWt15a}PmH9uW;gRebCTTEKFP@A;FAw!(@t2J=BR ztn&sySMS`JQRQblzq=Ia)5-2|O`@GG*Ui~}DB7cZ;l|D4x45?hz|DwNdH(}q`3xHX diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 2863c5aec..be648f084 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -219,8 +219,8 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, char tmp[MAX_TASK_STRLEN]; parsec_list_t* li = migrated_task_list[starving_device_index]; parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); - //printf("Enqueue task %s to %d\n",parsec_task_snprintf(tmp, MAX_TASK_STRLEN, task), starving_device_index); - printf("Enqueue Task(%d) from %d\n", task->locals[0].value); + printf("Enqueue task %s to device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), starving_device_index); + //iprintf("Enqueue Task(%d) from %d\n", task->locals[0].value); //printf("Enqueue task to %d\n", starving_device_index); return 0; } @@ -256,8 +256,8 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) if(task != NULL) { PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); - //printf("Dequeue task %s from %d\n", parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, task), i); - printf("Dequeue Task(%d) from %d\n", task->locals[0].value , i); + printf("Dequeue task %s from device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); + //printf("Dequeue Task(%d) from %d\n", task->locals[0].value , i); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } } @@ -291,6 +291,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; int half = 0, nb_migrated = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; + char tmp[128]; //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); @@ -311,8 +312,9 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device - printf("Task(%d) migrated from device %d to device %d: %d \n", ((parsec_task_t *) migrated_gpu_task)->locals[0].value, dealer_device_index, starving_device_index, nb_migrated); - parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + //printf("Task(%d) migrated from device %d to device %d: %d \n", ((parsec_task_t *) migrated_gpu_task)->locals[0].value, dealer_device_index, starving_device_index, nb_migrated); + printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); + parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); } //else // break; From 713c36b1c087b5e1147e9fa0a58b764e9f18b0c9 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 30 Apr 2022 00:29:03 +1000 Subject: [PATCH 030/215] code cleanup --- parsec/mca/device/cuda/device_cuda_migrate.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index be648f084..dadb674e7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -220,8 +220,6 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, parsec_list_t* li = migrated_task_list[starving_device_index]; parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); printf("Enqueue task %s to device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), starving_device_index); - //iprintf("Enqueue Task(%d) from %d\n", task->locals[0].value); - //printf("Enqueue task to %d\n", starving_device_index); return 0; } @@ -255,9 +253,8 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) if(task != NULL) { - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); printf("Dequeue task %s from device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); - //printf("Dequeue Task(%d) from %d\n", task->locals[0].value , i); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } } @@ -308,13 +305,12 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module if(migrated_gpu_task != NULL) { - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device - //printf("Task(%d) migrated from device %d to device %d: %d \n", ((parsec_task_t *) migrated_gpu_task)->locals[0].value, dealer_device_index, starving_device_index, nb_migrated); - printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); + parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); } //else // break; From c262067480d154bd6dd9e73256747b39386b4b38 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 30 Apr 2022 01:18:14 +1000 Subject: [PATCH 031/215] task counting cahaged to different levels of execution --- parsec/mca/device/cuda/device_cuda_migrate.c | 42 +++++++++++--------- parsec/mca/device/cuda/device_cuda_migrate.h | 15 +++++-- parsec/mca/device/cuda/device_cuda_module.c | 4 +- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index dadb674e7..4f3004c1d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -16,7 +16,7 @@ static int NDEVICES; int parsec_cuda_migrate_init(int ndevices) { - int i; + int i, j; cudaError_t cudastatus; #if defined(PARSEC_HAVE_CUDA) nvmlReturn_t nvml_ret; @@ -28,7 +28,8 @@ int parsec_cuda_migrate_init(int ndevices) for(i = 0; i < NDEVICES; i++) { - device_info[i].task_count = 0; + for(j = 0; j < EXECUTION_LEVEL; j++) + device_info[i].task_count[j] = 0; device_info[i].load = 0; migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } @@ -97,44 +98,47 @@ int parsec_cuda_get_device_load(int device) } + /** - * @brief returns the number of tasks in a particular device + * @brief sets the load of a particular device * * @param device index of the device * @return int */ -int parsec_cuda_get_device_task(int device) +int parsec_cuda_set_device_load(int device, int load) { - return device_info[device].task_count; + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); + return rc+load; } /** - * @brief sets the load of a particular device + * @brief returns the number of tasks in a particular device * * @param device index of the device + * @param level level of execution * @return int */ -int parsec_cuda_set_device_load(int device, int load) +int parsec_cuda_get_device_task(int device, int level) { - int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); - return rc+load; + return device_info[device].task_count[level]; } + /** * @brief sets the number of tasks in a particular device * * @param device index of the device + * @param level level of execution * @return int */ -int parsec_cuda_set_device_task(int device, int task_count) +int parsec_cuda_set_device_task(int device, int task_count, int level) { - int rc = parsec_atomic_fetch_add_int32(&(device_info[device].task_count), task_count); - //printf("Device %d: Prev load %d, Current load = %d \n", device, rc, rc+task_count); - return rc+task_count; + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].task_count[level]), task_count); + return rc + task_count; } /** @@ -148,7 +152,7 @@ int parsec_cuda_set_device_task(int device, int task_count) int is_starving(int device) { //if( device_info[device].load < 1 && device_info[device].task_count < 1 ) - if( device_info[device].task_count < 1 ) + if( device_info[device].task_count[/* level */ 0] < 1 ) return 1; else return 0; @@ -292,7 +296,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if(parsec_cuda_get_device_task(dealer_device_index) < 3) // make sure dealer does not starve + if(parsec_cuda_get_device_task(dealer_device_index, /* level */ 0) < 3) // make sure dealer does not starve return 0; starving_device_index = find_starving_device(dealer_device_index); @@ -307,8 +311,8 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; - parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device - parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device + parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); } @@ -348,8 +352,8 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t if(migrated_gpu_task != NULL) { - parsec_cuda_set_device_task(dealer_device_index, -1); // decrement task count at the dealer device - parsec_cuda_set_device_task(starving_device_index, 1); // increment task count at the starving device + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device + parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); return 1; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 544841b3e..9ba26dbcc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -11,9 +11,18 @@ #include #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) +/** + * @brief + * level 0 - task has been enqueued to the pending queue of the device. It has not been progressed. + * level 1 - task has been dequeued from the pending queue of the device and it has been moved to + * the queue that deals with movement of the task data to the GPU, but has not yet been moved + * level 2 - task data has been moved to the GPU, GPU is in control of the data and Task. + * + */ +#define EXECUTION_LEVEL 3 typedef struct parsec_device_cuda_info_s { - int task_count; + int task_count[EXECUTION_LEVEL]; int load; //parsec_atomic_lock_t lock; } parsec_device_cuda_info_t; @@ -21,9 +30,9 @@ typedef struct parsec_device_cuda_info_s { int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); -int parsec_cuda_get_device_task(int device); int parsec_cuda_set_device_load(int device, int load); -int parsec_cuda_set_device_task(int device, int task_count); +int parsec_cuda_get_device_task(int device, int level); +int parsec_cuda_set_device_task(int device, int task_count, int level); int is_starving(int device); int find_starving_device(int dealer_device); parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index c28d0a5aa..887427227 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2589,7 +2589,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1); // increment task count for this device + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); @@ -2770,7 +2770,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * immediatly befor the execution of the task. * TODO: Should this be moved to when the task is completed? */ - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); remove_gpu_task: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; From d499124d4b397a94ad632dfb1f80f0504d1bd0bb Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 30 Apr 2022 06:36:31 +1000 Subject: [PATCH 032/215] half policy reinstated --- parsec/mca/device/cuda/device_cuda_migrate.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4f3004c1d..5361b2f4c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -223,7 +223,7 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, char tmp[MAX_TASK_STRLEN]; parsec_list_t* li = migrated_task_list[starving_device_index]; parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); - printf("Enqueue task %s to device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), starving_device_index); + printf("Enqueue task %s to device queue %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), starving_device_index); return 0; } @@ -258,7 +258,7 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) if(task != NULL) { PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); - printf("Dequeue task %s from device %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); + printf("Dequeue task %s from device queue %d and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } } @@ -296,15 +296,17 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if(parsec_cuda_get_device_task(dealer_device_index, /* level */ 0) < 3) // make sure dealer does not starve + //if(parsec_cuda_get_device_task(dealer_device_index, /* level */ 0) < 3) // make sure dealer does not starve + // return 0; + if(is_starving(dealer_device_index)) return 0; starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return 0; - //do - //{ + do + { migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); if(migrated_gpu_task != NULL) { @@ -316,11 +318,11 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); } - //else - // break; + else + break; - //half++; - //}while(half < (dealer_task_count / 2) ); + half++; + }while(half < (dealer_task_count / 2) ); //if(nb_migrated > 0) // printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); From 0584df3771dc6e64551e1ce2e904ec8bcda0761c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 3 May 2022 00:59:31 +1000 Subject: [PATCH 033/215] total tasks computation per divice added --- parsec/mca/device/cuda/device_cuda_migrate.c | 19 ++++++++++++++++++- parsec/mca/device/cuda/device_cuda_migrate.h | 7 ++++--- parsec/mca/device/cuda/device_cuda_module.c | 1 + 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 5361b2f4c..d8507093d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -31,6 +31,7 @@ int parsec_cuda_migrate_init(int ndevices) for(j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; device_info[i].load = 0; + device_info[i].total_tasks_executed = 0; migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } @@ -47,6 +48,7 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini() { int i; + int total_tasks = 0; #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); @@ -55,6 +57,7 @@ int parsec_cuda_migrate_fini() for(i = 0; i < NDEVICES; i++) { PARSEC_OBJ_RELEASE(migrated_task_list[i]); + printf("Total tasks executed in device %d: %d \n", i, device_info[i].total_tasks_executed); } free(migrated_task_list); free(device_info); @@ -109,7 +112,7 @@ int parsec_cuda_get_device_load(int device) int parsec_cuda_set_device_load(int device, int load) { int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); - return rc+load; + return rc + load; } @@ -141,6 +144,20 @@ int parsec_cuda_set_device_task(int device, int task_count, int level) return rc + task_count; } + +/** + * @brief sets the load of a particular device + * + * @param device index of the device + * @return int + */ + +int parsec_cuda_tasks_executed(int device) +{ + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].total_tasks_executed), 1); + return rc + 1; +} + /** * @brief returns 1 if the device is starving, 0 if its is not * diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 9ba26dbcc..7a25e2df9 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -22,9 +22,9 @@ #define EXECUTION_LEVEL 3 typedef struct parsec_device_cuda_info_s { - int task_count[EXECUTION_LEVEL]; - int load; - //parsec_atomic_lock_t lock; + int total_tasks_executed; + int task_count[EXECUTION_LEVEL]; + int load; } parsec_device_cuda_info_t; int parsec_cuda_migrate_init(int ndevices); @@ -33,6 +33,7 @@ int parsec_cuda_get_device_load(int device); int parsec_cuda_set_device_load(int device, int load); int parsec_cuda_get_device_task(int device, int level); int parsec_cuda_set_device_task(int device, int task_count, int level); +int parsec_cuda_tasks_executed(int device); int is_starving(int device); int find_starving_device(int dealer_device); parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 887427227..88c045b12 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2771,6 +2771,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * TODO: Should this be moved to when the task is completed? */ parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); + parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); remove_gpu_task: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; From 6229e797609f2821fb5354e691d1f39047a4fca1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 3 May 2022 03:52:11 +1000 Subject: [PATCH 034/215] DEVICE_NUM() macro added --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++---- parsec/mca/device/cuda/device_cuda_migrate.h | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d8507093d..a72207cc0 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -17,7 +17,7 @@ static int NDEVICES; int parsec_cuda_migrate_init(int ndevices) { int i, j; - cudaError_t cudastatus; + #if defined(PARSEC_HAVE_CUDA) nvmlReturn_t nvml_ret; #endif @@ -189,13 +189,11 @@ int find_starving_device(int dealer_device) { int i; - // 0 device is the CPU, 1 is recursive for(i = 0; i < NDEVICES; i++) { if( i == dealer_device ) continue; - //printf("Find_starving_device: Total_Dev %d Dealer_Dev %d starving device %d\n", NDEVICES, dealer_device, i); if(is_starving(i)) return i; } @@ -276,7 +274,7 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) { PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); printf("Dequeue task %s from device queue %d and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); - parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, i+2); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ + parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, DEVICE_NUM(i)); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ } } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 7a25e2df9..4f3f4f5cf 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -11,6 +11,8 @@ #include #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) +#define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) + /** * @brief * level 0 - task has been enqueued to the pending queue of the device. It has not been progressed. From 8a03db725d8acb943f5aa930e628845bce26d879 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 3 May 2022 06:36:16 +1000 Subject: [PATCH 035/215] task counting corrected task was being counted twice when a task was migrated. Initially in migrate_if_starving() and then again in parsec_cuda_kernel_scheduler(). The counting in migrate_if_starving() was removed to correct this. --- parsec/mca/device/cuda/device_cuda_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index a72207cc0..cefc83414 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -58,6 +58,7 @@ int parsec_cuda_migrate_fini() { PARSEC_OBJ_RELEASE(migrated_task_list[i]); printf("Total tasks executed in device %d: %d \n", i, device_info[i].total_tasks_executed); + printf("Test count %d: %d \n", i, parsec_cuda_get_device_task(i, 0)); } free(migrated_task_list); free(device_info); @@ -329,8 +330,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device - printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); + printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); } else From 0360825256c1ccfa973f2c8570ed3d56323b94fe Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 5 May 2022 01:02:40 +1000 Subject: [PATCH 036/215] migration protocol simplified. A new structure migrated_task_t is used to hold the migrated task as well as the starving and the dealer devices. This is enqueued to a single queue. All the compute threads search this queue to discover migrated tasks. --- parsec/mca/device/cuda/device_cuda_migrate.c | 118 ++++++++----------- parsec/mca/device/cuda/device_cuda_migrate.h | 17 ++- 2 files changed, 61 insertions(+), 74 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index cefc83414..b8055db2f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -2,7 +2,7 @@ extern int parsec_device_cuda_enabled; parsec_device_cuda_info_t* device_info; -static parsec_list_t** migrated_task_list; +static parsec_list_t* migrated_task_list; static int NDEVICES; @@ -24,7 +24,7 @@ int parsec_cuda_migrate_init(int ndevices) NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); - migrated_task_list = (parsec_list_t**) calloc(ndevices, sizeof(parsec_list_t*)); + migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t);; for(i = 0; i < NDEVICES; i++) { @@ -32,7 +32,6 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].task_count[j] = 0; device_info[i].load = 0; device_info[i].total_tasks_executed = 0; - migrated_task_list[i] = PARSEC_OBJ_NEW(parsec_list_t); } #if defined(PARSEC_HAVE_CUDA) @@ -56,11 +55,10 @@ int parsec_cuda_migrate_fini() for(i = 0; i < NDEVICES; i++) { - PARSEC_OBJ_RELEASE(migrated_task_list[i]); printf("Total tasks executed in device %d: %d \n", i, device_info[i].total_tasks_executed); - printf("Test count %d: %d \n", i, parsec_cuda_get_device_task(i, 0)); + printf("Test count %d: %d \n", i, parsec_cuda_get_device_task(i, 0)); } - free(migrated_task_list); + PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); printf("Migration module shut down \n"); @@ -203,45 +201,6 @@ int find_starving_device(int dealer_device) } -/** - * @brief selects a new starving device instead of the originally - * intended device. This enables migration of a task before it - * is scheduled to any particular device. - * - * @param dealer_device_index the device the task was initially assigned to - * @return parsec_device_gpu_module_t* - * - */ -parsec_device_gpu_module_t* -parsec_cuda_change_device( int dealer_device_index) -{ - int starving_device_index; - parsec_device_gpu_module_t* starving_gpu_device; - - starving_device_index = find_starving_device(dealer_device_index); - - if(starving_device_index == -1) - starving_device_index = dealer_device_index; - starving_gpu_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(starving_device_index); - - printf(" parsec_cuda_change_device: Total_Dev %d Dealer_Dev %d Starving_dev %d \n", - parsec_device_cuda_enabled, dealer_device_index, starving_device_index); - - return starving_gpu_device; -} - - - -int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, - parsec_task_t *task, - int starving_device_index) -{ - char tmp[MAX_TASK_STRLEN]; - parsec_list_t* li = migrated_task_list[starving_device_index]; - parsec_list_chain_sorted(li, (parsec_list_item_t*) task, parsec_execution_context_priority_comparator); - printf("Enqueue task %s to device queue %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), starving_device_index); - return 0; -} /** * @brief This function will be called in __parsec_context_wait() just before @@ -256,26 +215,27 @@ int parsec_cuda_kernel_enqueue( parsec_execution_stream_t *es, * @return int */ -int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) +int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) { int i; char tmp[128]; - parsec_task_t * task = NULL; - parsec_list_t* li = NULL; + migrated_task_t *mig_task = NULL; + parsec_gpu_task_t *migrated_gpu_task; + parsec_device_gpu_module_t* dealer_device; + parsec_device_gpu_module_t* starving_device; - for(i = 0; i < NDEVICES; i++) - { - li = migrated_task_list[i]; - task = (parsec_task_t*) parsec_list_pop_front(li); - if(task != NULL) - break; - } + mig_task = (migrated_task_t*) parsec_fifo_try_pop(migrated_task_list); - if(task != NULL) + if(mig_task != NULL) { - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); - printf("Dequeue task %s from device queue %d and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) task)->ec), i); - parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) task, DEVICE_NUM(i)); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ + parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; + parsec_device_gpu_module_t* dealer_device = mig_task->dealer_device; + parsec_device_gpu_module_t* starving_device = mig_task->starving_device; + + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); + printf("Dequeue task %s from device queue %d and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), i); + parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, DEVICE_NUM(i)); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ + free(mig_task); } } @@ -286,12 +246,18 @@ int parsec_cuda_kernel_dequeue( parsec_execution_stream_t *es) * Returns: negative number if any error occured. * positive: starving device index. */ -int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, - int starving_device_index, - parsec_gpu_task_t *migrated_gpu_task) +int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task) { - parsec_cuda_kernel_enqueue(es, (parsec_task_t *) migrated_gpu_task, starving_device_index); - return starving_device_index; + parsec_list_chain_sorted(migrated_task_list, (parsec_list_item_t*) mig_task, parsec_execution_context_priority_comparator); + + char tmp[MAX_TASK_STRLEN]; + parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; + parsec_device_gpu_module_t* dealer_device = mig_task->dealer_device; + parsec_device_gpu_module_t* starving_device = mig_task->starving_device; + printf("Enqueue task %s to device queue %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, + ((parsec_gpu_task_t *) migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); + + return 0; } /** @@ -308,18 +274,18 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; int half = 0, nb_migrated = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; + parsec_device_gpu_module_t* starving_device = NULL; + migrated_task_t *mig_task = NULL; char tmp[128]; - //dealer_task_count = parsec_cuda_get_device_task(dealer_device_index); dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - //if(parsec_cuda_get_device_task(dealer_device_index, /* level */ 0) < 3) // make sure dealer does not starve - // return 0; if(is_starving(dealer_device_index)) return 0; starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return 0; + starving_device = (parsec_device_cuda_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); do { @@ -330,8 +296,14 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); + + mig_task = (parsec_gpu_task_t *) malloc(sizeof(parsec_gpu_task_t)); + mig_task->gpu_task = migrated_gpu_task; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + + parsec_cuda_mig_task_enqueue(es, mig_task); } else break; @@ -361,6 +333,7 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t parsec_gpu_task_t* migrated_gpu_task) { int starving_device_index = -1, dealer_device_index = 0; + migrated_task_t *mig_task = NULL; dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); starving_device_index = find_starving_device(dealer_device_index); @@ -371,7 +344,12 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t { parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device - parsec_cuda_kernel_migrate(es, starving_device_index, migrated_gpu_task); + + mig_task = (parsec_gpu_task_t *) malloc(sizeof(parsec_gpu_task_t)); + mig_task->gpu_task = migrated_gpu_task; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = (parsec_device_cuda_module_t*)parsec_mca_device_get(starving_device_index); + parsec_cuda_mig_task_enqueue(es, mig_task); return 1; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 4f3f4f5cf..034d251bb 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -23,12 +23,22 @@ */ #define EXECUTION_LEVEL 3 -typedef struct parsec_device_cuda_info_s { +typedef struct parsec_device_cuda_info_s +{ int total_tasks_executed; int task_count[EXECUTION_LEVEL]; int load; } parsec_device_cuda_info_t; +typedef struct migrated_task_s +{ + parsec_list_item_t list_item; + parsec_gpu_task_t* gpu_task; + parsec_device_gpu_module_t* dealer_device; + parsec_device_gpu_module_t* starving_device; + +} migrated_task_t; + int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); @@ -39,9 +49,8 @@ int parsec_cuda_tasks_executed(int device); int is_starving(int device); int find_starving_device(int dealer_device); parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); -int parsec_cuda_kernel_migrate( parsec_execution_stream_t *es, - int starving_device_index, - parsec_gpu_task_t *migrated_gpu_task); +int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task); +int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es); int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, parsec_gpu_task_t* migrated_gpu_task); int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); From a940042107f11b11279c7fcc7a5ff62298aa564d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 5 May 2022 01:18:05 +1000 Subject: [PATCH 037/215] new api call added to scheduler --- parsec/scheduling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/scheduling.c b/parsec/scheduling.c index 3b904febf..d4a904f1f 100644 --- a/parsec/scheduling.c +++ b/parsec/scheduling.c @@ -564,7 +564,7 @@ int __parsec_context_wait( parsec_execution_stream_t* es ) * This will also ensure that a migrated task gets priority in execution * when compared to a new task. */ - parsec_cuda_kernel_dequeue(es); + parsec_cuda_mig_task_dequeue(es); task = parsec_current_scheduler->module.select(es, &distance); From f989330f386754c49bd579a2a01d4322c54521c7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 5 May 2022 02:02:00 +1000 Subject: [PATCH 038/215] GPU task type checkd and some code correction. Everytime a task is seleected for migration the type of the task is checked to make sure that it is a computational task and not some task inserted by PaRSEC for bookkeeping. If it is a bookkeeping task migration is stopped do that PaRSEC can proceed with the bookkeeping. --- parsec/mca/device/cuda/device_cuda_migrate.c | 26 ++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b8055db2f..3d53b1410 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -217,7 +217,6 @@ int find_starving_device(int dealer_device) int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) { - int i; char tmp[128]; migrated_task_t *mig_task = NULL; parsec_gpu_task_t *migrated_gpu_task; @@ -233,10 +232,14 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) parsec_device_gpu_module_t* starving_device = mig_task->starving_device; PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); - printf("Dequeue task %s from device queue %d and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), i); - parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, DEVICE_NUM(i)); /* device 0 is the CPU, device 1 is recursive, cuda device count starts from 2 */ + printf("Dequeue task %s from device queue and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec)); + parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, starving_device->super.device_index); free(mig_task); + + return 1; } + + return 0; } /** @@ -285,20 +288,29 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module starving_device_index = find_starving_device(dealer_device_index); if(starving_device_index == -1) return 0; - starving_device = (parsec_device_cuda_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); + starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); do { migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); if(migrated_gpu_task != NULL) { + /** + * @brief if the GPU task is a not a computational kerenel + * stop migration. + */ + if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL) + { + parsec_list_push_front(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task); + return nb_migrated; + } PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - mig_task = (parsec_gpu_task_t *) malloc(sizeof(parsec_gpu_task_t)); + mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; @@ -345,10 +357,10 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device - mig_task = (parsec_gpu_task_t *) malloc(sizeof(parsec_gpu_task_t)); + mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; - mig_task->starving_device = (parsec_device_cuda_module_t*)parsec_mca_device_get(starving_device_index); + mig_task->starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(starving_device_index); parsec_cuda_mig_task_enqueue(es, mig_task); return 1; } From bfbeed2153ee73206ba014064099123af6de70d9 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 5 May 2022 02:27:50 +1000 Subject: [PATCH 039/215] parsec_cuda_mig_task_enqueue() updated to use parsec_list_push_back() instead of parsec_list_chain_sorted(). --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3d53b1410..44e16899d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -251,7 +251,8 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) */ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task) { - parsec_list_chain_sorted(migrated_task_list, (parsec_list_item_t*) mig_task, parsec_execution_context_priority_comparator); + //parsec_list_chain_sorted(migrated_task_list, (parsec_list_item_t*) mig_task, parsec_execution_context_priority_comparator); + parsec_list_push_back(migrated_task_list, mig_task); char tmp[MAX_TASK_STRLEN]; parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; @@ -323,9 +324,6 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module half++; }while(half < (dealer_task_count / 2) ); - //if(nb_migrated > 0) - // printf("Tasks migrated from device %d to device %d: %d \n", dealer_device_index, starving_device_index, nb_migrated); - return nb_migrated; } From 62520898ea2917577eaed533fc127cf352b20530 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 8 May 2022 14:19:12 +1000 Subject: [PATCH 040/215] First level migration added. We use the exiting stage_in functionality of a task to do device to device data trasfer. The readers, coherency_state and data_transfer_status, of the data to be migrated, is manipulated to perform the D2D transfer. In parsec_gpu_data_stage_in(), conditions are added to adapt it for D2D transfer for task migration. --- parsec/mca/device/cuda/device_cuda_migrate.c | 242 ++++++++++++++++--- parsec/mca/device/cuda/device_cuda_migrate.h | 10 + parsec/mca/device/cuda/device_cuda_module.c | 86 ++++++- 3 files changed, 300 insertions(+), 38 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 44e16899d..2a05a97e3 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -252,7 +252,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task) { //parsec_list_chain_sorted(migrated_task_list, (parsec_list_item_t*) mig_task, parsec_execution_context_priority_comparator); - parsec_list_push_back(migrated_task_list, mig_task); + parsec_list_push_back((parsec_list_t*)migrated_task_list, (parsec_list_item_t*)mig_task); char tmp[MAX_TASK_STRLEN]; parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; @@ -311,6 +311,9 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); + //increment_readers(migrated_gpu_task, dealer_device); + //migrate_data_d2d(migrated_gpu_task, dealer_device, starving_device); + test_task_permission(migrated_gpu_task, dealer_device); mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; @@ -327,45 +330,224 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return nb_migrated; } +int test_task_permission(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) +{ + int i = 0; + parsec_task_t *task = gpu_task->ec; + char tmp[128]; + for(i = 0; i < task->task_class->nb_flows; i++) + { + if (task->data[i].data_out == NULL) + continue; -/** - * @brief Tasks is migrated immediatly. - * Mainly used for validating migration protocol. - * - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int - */ + if(task->data[i].data_out->original->owner_device == dealer_device->super.device_index) + { + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + if(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) + { + printf("%s: has WRITE permission on copy %p [reader = %d], in GPU[%s] with Coherency %d \n", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + task->data[i].data_out, task->data[i].data_out->readers, + dealer_device->super.name, task->data[i].data_out->coherency_state); + printf("NEW permissions %d OLD permissions %d\n", gpu_task->flow[i]->flow_flags, + gpu_task->flow[i]->flow_flags ^ PARSEC_FLOW_ACCESS_WRITE); + } -int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, - parsec_gpu_task_t* migrated_gpu_task) + if(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) + printf("%s: has READ permission on copy %p [reader = %d], in GPU[%s] with Coherency %d \n", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + task->data[i].data_out, task->data[i].data_out->readers, + dealer_device->super.name, task->data[i].data_out->coherency_state); + + //parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( task->data[i].data_out->device_index ); + //printf("%s: %s (%s) can directly do D2D from devices with masks %d \n", + // parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + // dealer_device->super.name,(&in_elem_dev->super)->super.name, + // dealer_device->peer_access_mask); + + printf("%s: possible D2D from cuda device %d for flow %d (from copy %p) \n", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + ((parsec_device_cuda_module_t* )dealer_device)->cuda_index, + gpu_task->flow[i]->flow_index, task->data[i].data_out); + + task->data[i].data_in = task->data[i].data_out; + if(task->data[i].data_in->device_index != task->data[i].data_in->original->owner_device) + { + printf("There is something wrong!! device index is %d instead of %d \n", + task->data[i].data_in->device_index, + task->data[i].data_in->original->owner_device); + + task->data[i].data_in->device_index = dealer_device->super.device_index; + } + parsec_data_t* original = task->data[i].data_in->original; + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_atomic_lock( &original->lock ); + if(task->data[i].data_in->readers < 0) + { + printf("There is something wrong!! reader is negative \n"); + task->data[i].data_in->readers = 0; + } + parsec_atomic_fetch_inc_int32(&task->data[i].data_in->readers); + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + task->data[i].data_in->data_transfer_status = PARSEC_DATA_STATUS_SHOULD_MIGRATE; + parsec_atomic_unlock( &original->lock ); + } + } + + return 0; +} + + + + +int increment_readers(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) { - int starving_device_index = -1, dealer_device_index = 0; - migrated_task_t *mig_task = NULL; + int i = 0; + parsec_task_t *task = gpu_task->ec; + char tmp[128]; - dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - starving_device_index = find_starving_device(dealer_device_index); - if(starving_device_index == -1) - return -1; + for(i = 0; i < task->task_class->nb_flows; i++) + { + if (task->data[i].data_out == NULL) + continue; + /** + * @brief if the owner of the data is not the dealer device, so data_out + * has never been populated with the latest data. + * So, dont bother with any operation. + */ + if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) + { + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + task->data[i].data_out = NULL; + continue; + } + + task->data[i].data_out->readers++; + PARSEC_OBJ_RETAIN(task->data[i].data_out); + printf("%s: Reader incremented for copy %p [reader = %d], in GPU[%s] \n", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + task->data[i].data_out, task->data[i].data_out->readers, + dealer_device->super.name); + } + + return 0; +} - if(migrated_gpu_task != NULL) +int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, + parsec_device_gpu_module_t* dst_dev) +{ + int i = 0, rc = 0, count = 0; + parsec_task_t *task = gpu_task->ec; + const parsec_flow_t *flow; + parsec_data_copy_t * src_data; + parsec_data_copy_t * dst_data; + parsec_data_t* original; // = task->data[flow->flow_index].data_out->original;; + for(i = 0; i < task->task_class->nb_flows; i++) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - parsec_cuda_set_device_task(starving_device_index, /* count */ 1, /* level */ 0); // increment task count at the starving device - - mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); - mig_task->gpu_task = migrated_gpu_task; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(starving_device_index); - parsec_cuda_mig_task_enqueue(es, mig_task); - return 1; + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & flow->flow_flags)) //CTL flow + continue; + + /** + * @brief if the owner of the data is not the dealer device, the NULL is + * set in increment_readers(), so dont bother with any operation. + */ + if (task->data[i].data_out == NULL) + continue; + + original = task->data[flow->flow_index].data_in->original; + flow = gpu_task->flow[i]; + + count = parsec_gpu_data_reserve_device_space_for_flow(dst_dev, gpu_task, flow); + if(rc == PARSEC_HOOK_RETURN_DONE) + { + //parsec_atomic_lock(&original->lock); + //src_data = task->data[i].data_in; + src_data = original->device_copies[src_dev->super.device_index]; + dst_data = task->data[i].data_out; + dst_data->data_transfer_status = PARSEC_DATA_STATUS_UNDER_TRANSFER; + //count = (src_data->original->nb_elts <= dst_data->original->nb_elts) ? + // src_data->original->nb_elts : dst_data->original->nb_elts; + + printf("Moving data from GPU[%s] copy %p at real address %p to GPU[%s] copy %p at real address %p (original %p) \n", + src_dev->super.name, src_data, src_data->device_private, + dst_dev->super.name, dst_data, dst_data->device_private, + original); + + rc = (cudaError_t)cudaMemcpyAsync( dst_data->device_private, + src_data->device_private, + count, + cudaMemcpyDeviceToDevice, + dst_dev->exec_stream[0] ); + PARSEC_CUDA_CHECK_ERROR( "cudaMemcpyAsync ", rc, { return PARSEC_ERROR; } ); + //parsec_atomic_unlock(&original->lock); + dst_data->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; + src_data->readers--; + PARSEC_OBJ_RELEASE(src_data); + parsec_list_push_back(&src_dev->gpu_mem_lru, (parsec_list_item_t*)src_data); + parsec_list_push_back(&dst_dev->gpu_mem_lru, (parsec_list_item_t*)dst_data); + + } } - return 0; +} + +/** + * return 0: reserving space successfull + * return -1: reserving space failed + */ - +int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, + parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow) +{ + parsec_task_t *this_task = gpu_task->ec; + parsec_gpu_data_copy_t* gpu_elem; + parsec_data_t* original; + int count = 0; + + original = this_task->data[flow->flow_index].data_in->original; + count = this_task->data[flow->flow_index].data_out->original->nb_elts; + parsec_atomic_lock(&original->lock); + //gpu_elem = PARSEC_DATA_GET_COPY(original, gpu_device->super.device_index); + //this_task->data[flow->flow_index].data_out = gpu_elem; + //if(gpu_elem != NULL) + // printf(" The data is already present. Why is this hapening? \n"); + + gpu_elem = PARSEC_OBJ_NEW(parsec_data_copy_t); + gpu_elem->flags = PARSEC_DATA_FLAG_PARSEC_OWNED | PARSEC_DATA_FLAG_PARSEC_MANAGED; + gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[flow->flow_index]); + if( NULL == gpu_elem->device_private ) + { + printf("ERROR: No memory in starving node. This should never happen \n"); + exit(0); + //return -1; + } + printf("GPU[%s] Succeeded Allocating CUDA copy %p at real address %p [ref_count %d] for data %p \n", + gpu_device->super.name, gpu_elem, gpu_elem->device_private, + gpu_elem->super.super.obj_reference_count, original); + + assert( 0 == gpu_elem->readers ); + gpu_elem->coherency_state = PARSEC_DATA_COHERENCY_INVALID; + gpu_elem->version = this_task->data[flow->flow_index].data_out->version; + parsec_data_copy_attach(original, gpu_elem, gpu_device->super.device_index); + gpu_elem->push_task = gpu_task->ec; + /* set the new datacopy type to the correct one */ + gpu_elem->dtt = this_task->data[flow->flow_index].data_out->dtt; + original->device_copies[gpu_device->super.device_index] = gpu_elem; + + //keep a copy in data_in so that we can use ot for cudaMemcpyAsync + //this_task->data[flow->flow_index].data_in = this_task->data[flow->flow_index].data_out; + this_task->data[flow->flow_index].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + //attach the memory in the gpu device to data_out + this_task->data[flow->flow_index].data_out = gpu_elem; + this_task->data[flow->flow_index].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; + parsec_atomic_unlock(&original->lock); + + return count; } + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 034d251bb..4e772c94d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -4,6 +4,7 @@ #include "parsec/parsec_config.h" #include "parsec/parsec_internal.h" +#include "parsec/utils/zone_malloc.h" #include "parsec/mca/device/cuda/device_cuda_internal.h" #include "parsec/scheduling.h" #include @@ -13,6 +14,10 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) +#define PARSEC_DATA_STATUS_SHOULD_MIGRATE ((parsec_data_coherency_t)0x3) +#define PARSEC_DATA_STATUS_UNDER_MIGRATION ((parsec_data_coherency_t)0x4) +#define PARSEC_DATA_STATUS_MIGRATION_COMPLETE ((parsec_data_coherency_t)0x5) + /** * @brief * level 0 - task has been enqueued to the pending queue of the device. It has not been progressed. @@ -54,6 +59,11 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es); int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, parsec_gpu_task_t* migrated_gpu_task); int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); +int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, + parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); +int increment_readers(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device); +int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, + parsec_device_gpu_module_t* dest_dev); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 88c045b12..ebe231b62 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -29,6 +29,7 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" + static int parsec_cuda_data_advise(parsec_device_module_t *dev, parsec_data_t *data, int advice); /** * According to @@ -1191,7 +1192,18 @@ parsec_default_cuda_stage_in(parsec_gpu_task_t *gtask, size_t count; parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t *)gpu_stream; int i; + char tmp[128]; + + for(i = 0; i < task->task_class->nb_flows; i++){ + if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("%s: Stage_in called for D2D copy from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), + ((parsec_device_cuda_module_t* )parsec_mca_device_get( task->data[i].data_in->device_index))->cuda_index, + task->data[i].data_in, + ((parsec_device_cuda_module_t* )parsec_mca_device_get(task->data[i].data_out->device_index))->cuda_index, + task->data[i].data_out); + if(flow_mask & (1U << i)){ copy_in = task->data[i].data_in; copy_out = task->data[i].data_out; @@ -1205,7 +1217,25 @@ parsec_default_cuda_stage_in(parsec_gpu_task_t *gtask, cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, cuda_stream->cuda_stream ); PARSEC_CUDA_CHECK_ERROR( "cudaMemcpyAsync ", ret, { return PARSEC_ERROR; } ); + + //printf("copy_in Transfer status = %d \n", copy_in->data_transfer_status); + + //if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("%s: D2D copy successfull from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), + ((parsec_device_cuda_module_t* )in_elem_dev)->cuda_index, + copy_in, + ((parsec_device_cuda_module_t* )parsec_mca_device_get(copy_out->device_index))->cuda_index, + copy_out); } + + if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("%s: D2D copy successfull from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), + ((parsec_device_cuda_module_t* )in_elem_dev)->cuda_index, + copy_in, + ((parsec_device_cuda_module_t* )parsec_mca_device_get(copy_out->device_index))->cuda_index, + copy_out); } return PARSEC_SUCCESS; } @@ -1275,6 +1305,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, uint32_t nb_elts = gpu_task->flow_nb_elts[flow->flow_index]; int transfer_from = -1; int undo_readers_inc_if_no_transfer = 0; + char tmp[128]; if( gpu_task->task_type == PARSEC_GPU_TASK_TYPE_PREFETCH ) { PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream, @@ -1310,7 +1341,22 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* Detect if we can do a device to device copy. * Current limitations: only for read-only data used read-only on the hosting GPU. */ parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); - if( (PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type) ) { + if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) + || /* check if this data belongs to a migrating task*/ in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) + { + if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + { + //printf("%s: D2D from device %s to device %s for flow %d (from copy %p) \n", + // parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + // gpu_device->super.name, (&in_elem_dev->super)->super.name, flow->flow_index, + // in_elem); + + printf("%s: D2D from cuda device %d to cuda device %d for flow %d (from copy %p) \n", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), + in_elem_dev->cuda_index, ((parsec_device_cuda_module_t* )gpu_device)->cuda_index, + flow->flow_index, in_elem); + } + int potential_alt_src = 0; if( PARSEC_DEV_CUDA == in_elem_dev->super.super.type ) { if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { @@ -1391,6 +1437,12 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, __FILE__, __LINE__); } + if(transfer_from == -1 && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + { + printf("Somethinng is wrong!! Transfer is already complete \n"); + transfer_from = 0; + } + /* If data is from NEW, as we skip NULL */ if( NULL == task_data->source_repo_entry ) transfer_from = -1; @@ -1423,7 +1475,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, in_elem_dev->super.super.device_index, in_elem->version, (void*)in_elem->device_private, in_elem, in_elem->super.super.obj_reference_count, gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); - assert((gpu_elem->version < in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER)); + assert((gpu_elem->version < in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); #if defined(PARSEC_PROF_TRACE) if( gpu_stream->prof_event_track_enable ) { @@ -1470,6 +1523,12 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } #endif /* Push data into the GPU from the source device */ + + if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("At the doorstep (copy %p) of D2D copy !! \n", in_elem); + if(gpu_task->stage_in == NULL && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) + printf("Something is wrong!! Stage_in is NULL"); + if(PARSEC_SUCCESS != (gpu_task->stage_in ? gpu_task->stage_in(gpu_task, (1U << flow->flow_index), gpu_stream): PARSEC_SUCCESS)) { parsec_warning( "%s:%d %s", __FILE__, __LINE__, "gpu_task->stage_in"); @@ -1477,11 +1536,20 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_warning("<<%p>> -> <<%p on CUDA device %d>> [%d, H2D]", in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, nb_elts); + if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("<<%p>> -> <<%p on CUDA device %d>> [%d, H2D]", + in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, + nb_elts); } else { parsec_warning("<<%p on CUDA device %d>> -> <<%p on CUDA device %d>> [%d, D2D]", in_elem->device_private, in_elem_dev->cuda_index, gpu_elem->device_private, cuda_device->cuda_index, nb_elts); + if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + printf("<<%p on CUDA device %d>> -> <<%p on CUDA device %d>> [%d, D2D]", + in_elem->device_private, in_elem_dev->cuda_index, + gpu_elem->device_private, cuda_device->cuda_index, + nb_elts); } parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) @@ -1498,7 +1566,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.nb_data_faults += nb_elts; /* update the data version in GPU immediately, and mark the data under transfer */ - assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER)); + assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", @@ -1518,6 +1587,12 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_fetch_dec_int32( &in_elem->readers ); assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); + if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + { + parsec_atomic_fetch_dec_int32( &in_elem->readers ); + in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; + } + parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, @@ -2765,11 +2840,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, __parsec_complete_execution( es, gpu_task->ec ); gpu_device->super.executed_tasks++; - /** - * @brief decrement the task count for the device. The decrement is done - * immediatly befor the execution of the task. - * TODO: Should this be moved to when the task is completed? - */ parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); From d260c106cfa2c0ac36bcb8a1109d5eaf93ecb966 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 8 May 2022 14:40:38 +1000 Subject: [PATCH 041/215] test_task_permission() renamed as change_task_features() --- parsec/mca/device/cuda/device_cuda_migrate.c | 158 +------------------ 1 file changed, 2 insertions(+), 156 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 2a05a97e3..9afc633ec 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -311,9 +311,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - //increment_readers(migrated_gpu_task, dealer_device); - //migrate_data_d2d(migrated_gpu_task, dealer_device, starving_device); - test_task_permission(migrated_gpu_task, dealer_device); + change_task_features(migrated_gpu_task, dealer_device); mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; @@ -330,7 +328,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return nb_migrated; } -int test_task_permission(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) +int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) { int i = 0; parsec_task_t *task = gpu_task->ec; @@ -399,155 +397,3 @@ int test_task_permission(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t return 0; } - - - - -int increment_readers(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) -{ - int i = 0; - parsec_task_t *task = gpu_task->ec; - char tmp[128]; - - for(i = 0; i < task->task_class->nb_flows; i++) - { - if (task->data[i].data_out == NULL) - continue; - /** - * @brief if the owner of the data is not the dealer device, so data_out - * has never been populated with the latest data. - * So, dont bother with any operation. - */ - if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) - { - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow - continue; - task->data[i].data_out = NULL; - continue; - } - - task->data[i].data_out->readers++; - PARSEC_OBJ_RETAIN(task->data[i].data_out); - printf("%s: Reader incremented for copy %p [reader = %d], in GPU[%s] \n", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - task->data[i].data_out, task->data[i].data_out->readers, - dealer_device->super.name); - } - - return 0; -} - -int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, - parsec_device_gpu_module_t* dst_dev) -{ - int i = 0, rc = 0, count = 0; - parsec_task_t *task = gpu_task->ec; - const parsec_flow_t *flow; - parsec_data_copy_t * src_data; - parsec_data_copy_t * dst_data; - parsec_data_t* original; // = task->data[flow->flow_index].data_out->original;; - for(i = 0; i < task->task_class->nb_flows; i++) - { - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & flow->flow_flags)) //CTL flow - continue; - - /** - * @brief if the owner of the data is not the dealer device, the NULL is - * set in increment_readers(), so dont bother with any operation. - */ - if (task->data[i].data_out == NULL) - continue; - - original = task->data[flow->flow_index].data_in->original; - flow = gpu_task->flow[i]; - - count = parsec_gpu_data_reserve_device_space_for_flow(dst_dev, gpu_task, flow); - if(rc == PARSEC_HOOK_RETURN_DONE) - { - //parsec_atomic_lock(&original->lock); - //src_data = task->data[i].data_in; - src_data = original->device_copies[src_dev->super.device_index]; - dst_data = task->data[i].data_out; - dst_data->data_transfer_status = PARSEC_DATA_STATUS_UNDER_TRANSFER; - //count = (src_data->original->nb_elts <= dst_data->original->nb_elts) ? - // src_data->original->nb_elts : dst_data->original->nb_elts; - - printf("Moving data from GPU[%s] copy %p at real address %p to GPU[%s] copy %p at real address %p (original %p) \n", - src_dev->super.name, src_data, src_data->device_private, - dst_dev->super.name, dst_data, dst_data->device_private, - original); - - rc = (cudaError_t)cudaMemcpyAsync( dst_data->device_private, - src_data->device_private, - count, - cudaMemcpyDeviceToDevice, - dst_dev->exec_stream[0] ); - PARSEC_CUDA_CHECK_ERROR( "cudaMemcpyAsync ", rc, { return PARSEC_ERROR; } ); - //parsec_atomic_unlock(&original->lock); - dst_data->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - src_data->readers--; - PARSEC_OBJ_RELEASE(src_data); - parsec_list_push_back(&src_dev->gpu_mem_lru, (parsec_list_item_t*)src_data); - parsec_list_push_back(&dst_dev->gpu_mem_lru, (parsec_list_item_t*)dst_data); - - } - } - return 0; -} - -/** - * return 0: reserving space successfull - * return -1: reserving space failed - */ - -int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, - parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow) -{ - parsec_task_t *this_task = gpu_task->ec; - parsec_gpu_data_copy_t* gpu_elem; - parsec_data_t* original; - int count = 0; - - original = this_task->data[flow->flow_index].data_in->original; - count = this_task->data[flow->flow_index].data_out->original->nb_elts; - parsec_atomic_lock(&original->lock); - //gpu_elem = PARSEC_DATA_GET_COPY(original, gpu_device->super.device_index); - //this_task->data[flow->flow_index].data_out = gpu_elem; - //if(gpu_elem != NULL) - // printf(" The data is already present. Why is this hapening? \n"); - - gpu_elem = PARSEC_OBJ_NEW(parsec_data_copy_t); - gpu_elem->flags = PARSEC_DATA_FLAG_PARSEC_OWNED | PARSEC_DATA_FLAG_PARSEC_MANAGED; - gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[flow->flow_index]); - if( NULL == gpu_elem->device_private ) - { - printf("ERROR: No memory in starving node. This should never happen \n"); - exit(0); - //return -1; - } - printf("GPU[%s] Succeeded Allocating CUDA copy %p at real address %p [ref_count %d] for data %p \n", - gpu_device->super.name, gpu_elem, gpu_elem->device_private, - gpu_elem->super.super.obj_reference_count, original); - - assert( 0 == gpu_elem->readers ); - gpu_elem->coherency_state = PARSEC_DATA_COHERENCY_INVALID; - gpu_elem->version = this_task->data[flow->flow_index].data_out->version; - parsec_data_copy_attach(original, gpu_elem, gpu_device->super.device_index); - gpu_elem->push_task = gpu_task->ec; - /* set the new datacopy type to the correct one */ - gpu_elem->dtt = this_task->data[flow->flow_index].data_out->dtt; - original->device_copies[gpu_device->super.device_index] = gpu_elem; - - //keep a copy in data_in so that we can use ot for cudaMemcpyAsync - //this_task->data[flow->flow_index].data_in = this_task->data[flow->flow_index].data_out; - this_task->data[flow->flow_index].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - //attach the memory in the gpu device to data_out - this_task->data[flow->flow_index].data_out = gpu_elem; - this_task->data[flow->flow_index].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; - parsec_atomic_unlock(&original->lock); - - return count; -} - - - From 5b9caf3ee6df3bd0094c4cddf7f21534e39f1f9f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 10 May 2022 03:24:22 +1000 Subject: [PATCH 042/215] change_task_features() moved from migrate_if_starving() to parsec_cuda_mig_task_dequeue() --- parsec/mca/device/cuda/device_cuda_migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9afc633ec..6d7dc5ab7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -230,6 +230,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; parsec_device_gpu_module_t* dealer_device = mig_task->dealer_device; parsec_device_gpu_module_t* starving_device = mig_task->starving_device; + change_task_features(migrated_gpu_task, dealer_device); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); printf("Dequeue task %s from device queue and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec)); @@ -311,7 +312,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - change_task_features(migrated_gpu_task, dealer_device); + //change_task_features(migrated_gpu_task, dealer_device); mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; From 99050425b1454e90a2f41a3f3eea0892691fb5f7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 12 May 2022 02:56:42 +1000 Subject: [PATCH 043/215] accounting information included --- parsec/mca/device/cuda/device_cuda_migrate.c | 17 +++++++++++++---- parsec/mca/device/cuda/device_cuda_migrate.h | 8 ++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 6d7dc5ab7..5e0cb42a4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,6 +4,7 @@ extern int parsec_device_cuda_enabled; parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; +migration_accounting_t* accounting; /** @@ -24,6 +25,7 @@ int parsec_cuda_migrate_init(int ndevices) NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); + accounting = (migration_accounting_t *) calloc(ndevices, sizeof(migration_accounting_t)); migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t);; for(i = 0; i < NDEVICES; i++) @@ -31,7 +33,11 @@ int parsec_cuda_migrate_init(int ndevices) for(j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; device_info[i].load = 0; - device_info[i].total_tasks_executed = 0; + + accounting[i].level0 = 0; + accounting[i].level1 = 0; + accounting[i].level2 = 0; + accounting[i].total_tasks_executed = 0; } #if defined(PARSEC_HAVE_CUDA) @@ -55,8 +61,11 @@ int parsec_cuda_migrate_fini() for(i = 0; i < NDEVICES; i++) { - printf("Total tasks executed in device %d: %d \n", i, device_info[i].total_tasks_executed); - printf("Test count %d: %d \n", i, parsec_cuda_get_device_task(i, 0)); + printf("*********** DEVICE %d *********** \n", i); + printf("Total tasks executed: %d \n", accounting[i].total_tasks_executed); + printf("Tasks migrated: level0 %d, level1 %d, level2 %d (Total %d)\n", + accounting[i].level0, accounting[i].level1, accounting[i].level2, + accounting[i].level0 + accounting[i].level1 + accounting[i].level2); } PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -153,7 +162,7 @@ int parsec_cuda_set_device_task(int device, int task_count, int level) int parsec_cuda_tasks_executed(int device) { - int rc = parsec_atomic_fetch_add_int32(&(device_info[device].total_tasks_executed), 1); + int rc = parsec_atomic_fetch_add_int32(&(accounting[device].total_tasks_executed), 1); return rc + 1; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 4e772c94d..4a65f1ac4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -44,6 +44,14 @@ typedef struct migrated_task_s } migrated_task_t; +typedef struct migration_accounting_s +{ + int total_tasks_executed; + int level0; + int level1; + int level2; +} migration_accounting_t; + int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); From e750a3dba0aad1af35f30fa75ee987107be926c4 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 20 May 2022 02:12:31 +1000 Subject: [PATCH 044/215] Level 2 migration implemented. As all the data is in the source GPU after the first stage_in, the data in the source GPU will always be the candidate for the second stage_in. The change_task_features() ensures that the data in the source GPU is added to the gpu_mem_lru of the source device. The readers are for flows with WRITE permission, to make sure there is no eviction till the data transfer is complete frpm source GPU to destination GPU. We dont increment the reader for READ permission as it would have been incremented during the first stage_in to the source GPU. We also move the version increment for a data from stage_in to just before task execution using gpu_data_version_increment(). This will ensure that the version of data remains the same until it is sure to be acted on by the GPU. Right now there is a memory leak and we have disabled two asserts in parsec_cuda_flush_lru() and parsec_cuda_kernel_epilog(). The memory leak needs to be corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 297 +++++++++++++------ parsec/mca/device/cuda/device_cuda_migrate.h | 6 +- parsec/mca/device/cuda/device_cuda_module.c | 150 ++++------ tests/dsl/ptg/cuda/CMakeLists.txt | 4 +- tests/dsl/ptg/cuda/HelloWorldCuda.jdf | 126 +++----- tests/dsl/ptg/cuda/HelloWorldCuda_kernel.cu | 18 ++ 6 files changed, 324 insertions(+), 277 deletions(-) create mode 100644 tests/dsl/ptg/cuda/HelloWorldCuda_kernel.cu diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 5e0cb42a4..e5f16a0ad 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -5,7 +5,7 @@ parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; migration_accounting_t* accounting; - +PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); /** * @brief The function initialises the data structures required @@ -33,19 +33,26 @@ int parsec_cuda_migrate_init(int ndevices) for(j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; device_info[i].load = 0; - + accounting[i].level0 = 0; accounting[i].level1 = 0; accounting[i].level2 = 0; accounting[i].total_tasks_executed = 0; + accounting[i].received = 0; } #if defined(PARSEC_HAVE_CUDA) nvml_ret = nvmlInit_v2(); #endif + char hostname[256]; + gethostname(hostname, sizeof(hostname)); + printf("PID %d on %s ready for attach\n", getpid(), hostname); + //sleep(60); + printf("Migration module initialised for %d devices \n", NDEVICES); + return 0; } @@ -53,7 +60,6 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini() { int i; - int total_tasks = 0; #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); @@ -63,9 +69,10 @@ int parsec_cuda_migrate_fini() { printf("*********** DEVICE %d *********** \n", i); printf("Total tasks executed: %d \n", accounting[i].total_tasks_executed); - printf("Tasks migrated: level0 %d, level1 %d, level2 %d (Total %d)\n", + printf("Tasks migrated: level0 %d, level1 %d, level2 %d (Total %d)\n", accounting[i].level0, accounting[i].level1, accounting[i].level2, accounting[i].level0 + accounting[i].level1 + accounting[i].level2); + printf("Task received %d \n", accounting[i].received); } PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -89,8 +96,6 @@ int parsec_cuda_migrate_fini() int parsec_cuda_get_device_load(int device) { - unsigned int nvml_dev; - #if defined(PARSEC_HAVE_CUDA) nvmlDevice_t nvml_device; nvmlUtilization_t nvml_utilization; @@ -100,7 +105,7 @@ int parsec_cuda_get_device_load(int device) nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); device_info[device].load = nvml_utilization.gpu; - printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); + //printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); #else device_info[device].load = device_info[device].task_count; #endif /* PARSEC_HAVE_CUDA */ @@ -228,22 +233,28 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) { char tmp[128]; migrated_task_t *mig_task = NULL; - parsec_gpu_task_t *migrated_gpu_task; - parsec_device_gpu_module_t* dealer_device; - parsec_device_gpu_module_t* starving_device; + parsec_gpu_task_t *migrated_gpu_task = NULL; + parsec_device_gpu_module_t* dealer_device = NULL; + parsec_device_gpu_module_t* starving_device = NULL; + int stage_in_status = 0; + - mig_task = (migrated_task_t*) parsec_fifo_try_pop(migrated_task_list); + mig_task = (migrated_task_t*) parsec_list_pop_front(migrated_task_list); if(mig_task != NULL) { - parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; - parsec_device_gpu_module_t* dealer_device = mig_task->dealer_device; - parsec_device_gpu_module_t* starving_device = mig_task->starving_device; - change_task_features(migrated_gpu_task, dealer_device); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); - printf("Dequeue task %s from device queue and schedule\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec)); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); + migrated_gpu_task = mig_task->gpu_task; + dealer_device = mig_task->dealer_device; + starving_device = mig_task->starving_device; + stage_in_status = mig_task->stage_in_status; + change_task_features(migrated_gpu_task, dealer_device, stage_in_status); + + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); + parsec_atomic_fetch_inc_int32(&accounting[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, starving_device->super.device_index); + PARSEC_OBJ_DESTRUCT(mig_task); free(mig_task); return 1; @@ -261,16 +272,16 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) */ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task) { - //parsec_list_chain_sorted(migrated_task_list, (parsec_list_item_t*) mig_task, parsec_execution_context_priority_comparator); - parsec_list_push_back((parsec_list_t*)migrated_task_list, (parsec_list_item_t*)mig_task); + parsec_list_push_back((parsec_list_t*)migrated_task_list, (parsec_list_item_t*)mig_task); - char tmp[MAX_TASK_STRLEN]; parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; - parsec_device_gpu_module_t* dealer_device = mig_task->dealer_device; parsec_device_gpu_module_t* starving_device = mig_task->starving_device; - printf("Enqueue task %s to device queue %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, + + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); + (void)es; return 0; } @@ -285,12 +296,11 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) { - int starving_device_index = -1, dealer_device_index = 0, dealer_task_count = 0; - int half = 0, nb_migrated = 0; + int starving_device_index = -1, dealer_device_index = 0; + int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t* starving_device = NULL; migrated_task_t *mig_task = NULL; - char tmp[128]; dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); if(is_starving(dealer_device_index)) @@ -301,107 +311,200 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return 0; starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - do + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( &(dealer_device->pending) ); //level 0 + execution_level = 0; + if(migrated_gpu_task == NULL) { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(dealer_device->pending) ); - if(migrated_gpu_task != NULL) + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( dealer_device->exec_stream[0]->fifo_pending ); //level 1 + execution_level = 1; + if( migrated_gpu_task == NULL) { - /** - * @brief if the GPU task is a not a computational kerenel - * stop migration. - */ - if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL) + for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) { - parsec_list_push_front(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task); - return nb_migrated; + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 + if(migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } } - - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); - nb_migrated++; - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - printf("Task %s migrated from device %d to device %d: nb_migrated %d\n", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), dealer_device_index, starving_device_index, nb_migrated); - - //change_task_features(migrated_gpu_task, dealer_device); - mig_task = (migrated_task_t *) malloc(sizeof(migrated_task_t)); - mig_task->gpu_task = migrated_gpu_task; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = starving_device; - - parsec_cuda_mig_task_enqueue(es, mig_task); } - else - break; + } + - half++; - }while(half < (dealer_task_count / 2) ); + if(migrated_gpu_task != NULL) + { + assert(migrated_gpu_task->ec != NULL); + parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); + /** + * @brief if the GPU task is a not a computational kerenel + * stop migration. + */ + if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL) + { + if(execution_level == 0) + { + parsec_list_push_front(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task ); + } + if(execution_level == 1) + { + parsec_list_push_front( dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); + } + if(execution_level == 2) + { + parsec_list_push_front( dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); + } + + return nb_migrated; + } + assert( (migrated_gpu_task != NULL) && (migrated_gpu_task->ec != NULL) ); + + if(execution_level == 0) + accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level0++; + if(execution_level == 1) + accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level1++; + if(execution_level == 2) + accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level2++; + nb_migrated++; + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device + + mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + mig_task->gpu_task = migrated_gpu_task; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + mig_task->stage_in_status = (execution_level == 2) ? /* stage_in complete */ 1 : 0; + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); + parsec_cuda_mig_task_enqueue(es, mig_task); + + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); + } + + migrated_gpu_task = NULL; return nb_migrated; } -int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device) +int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, + int stage_in_status) { int i = 0; parsec_task_t *task = gpu_task->ec; - char tmp[128]; + parsec_data_copy_t *src_copy = NULL; for(i = 0; i < task->task_class->nb_flows; i++) { if (task->data[i].data_out == NULL) continue; + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + + if( stage_in_status == 1) + { + parsec_data_t* original = task->data[i].data_out->original; + parsec_atomic_lock( &original->lock ); + + assert(task->data[i].data_in->original == task->data[i].data_out->original); + if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) + assert( task->data[i].data_out->version == task->data[i].data_in->version); + + src_copy = task->data[i].data_out; + + assert(task->data[i].data_out != NULL); + assert(original->device_copies[dealer_device->super.device_index]!= NULL); + assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); + assert(src_copy->readers >= 0); - if(task->data[i].data_out->original->owner_device == dealer_device->super.device_index) - { - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow - continue; - if(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) - { - printf("%s: has WRITE permission on copy %p [reader = %d], in GPU[%s] with Coherency %d \n", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - task->data[i].data_out, task->data[i].data_out->readers, - dealer_device->super.name, task->data[i].data_out->coherency_state); - printf("NEW permissions %d OLD permissions %d\n", gpu_task->flow[i]->flow_flags, - gpu_task->flow[i]->flow_flags ^ PARSEC_FLOW_ACCESS_WRITE); - } - if(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) - printf("%s: has READ permission on copy %p [reader = %d], in GPU[%s] with Coherency %d \n", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - task->data[i].data_out, task->data[i].data_out->readers, - dealer_device->super.name, task->data[i].data_out->coherency_state); - - //parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( task->data[i].data_out->device_index ); - //printf("%s: %s (%s) can directly do D2D from devices with masks %d \n", - // parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - // dealer_device->super.name,(&in_elem_dev->super)->super.name, - // dealer_device->peer_access_mask); + assert( task->data[i].data_out->version == task->data[i].data_in->version); + + if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) + assert(task->data[i].data_out->version == task->data[i].data_in->version); - printf("%s: possible D2D from cuda device %d for flow %d (from copy %p) \n", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - ((parsec_device_cuda_module_t* )dealer_device)->cuda_index, - gpu_task->flow[i]->flow_index, task->data[i].data_out); - task->data[i].data_in = task->data[i].data_out; - if(task->data[i].data_in->device_index != task->data[i].data_in->original->owner_device) + //compensate for the reader incremented during the first stage_in + if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) + && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) { - printf("There is something wrong!! device index is %d instead of %d \n", - task->data[i].data_in->device_index, - task->data[i].data_in->original->owner_device); - - task->data[i].data_in->device_index = dealer_device->super.device_index; + assert( task->data[i].data_out->readers >= 0 ); } + else if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) + && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) + { + parsec_atomic_fetch_dec_int32(&task->data[i].data_out->readers); + parsec_list_item_ring_chop((parsec_list_item_t*)src_copy); + PARSEC_LIST_ITEM_SINGLETON(src_copy); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); + } + else if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) + && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) + { + parsec_list_item_ring_chop((parsec_list_item_t*)src_copy); + PARSEC_LIST_ITEM_SINGLETON(src_copy); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); + } + + src_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + src_copy->data_transfer_status = PARSEC_DATA_STATUS_SHOULD_MIGRATE; + + parsec_atomic_unlock( &original->lock ); + } + else + { + /** + * The data GPU will be the owner of the data, only if the task executing on that GPU + * has a write permission on it. (data.c line 423) + */ + if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) + continue; + if( task->data[i].data_in->version == task->data[i].data_out->version) + continue; + + task->data[i].data_in = task->data[i].data_out; parsec_data_t* original = task->data[i].data_in->original; + parsec_atomic_lock( &original->lock ); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_atomic_lock( &original->lock ); - if(task->data[i].data_in->readers < 0) - { - printf("There is something wrong!! reader is negative \n"); - task->data[i].data_in->readers = 0; - } - parsec_atomic_fetch_inc_int32(&task->data[i].data_in->readers); + + assert(task->data[i].data_in->readers >= 0); + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; task->data[i].data_in->data_transfer_status = PARSEC_DATA_STATUS_SHOULD_MIGRATE; - parsec_atomic_unlock( &original->lock ); + + parsec_atomic_unlock( &original->lock ); + } + } + return 0; +} + +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task) +{ + int i; + parsec_task_t *task = gpu_task->ec; + + for(i = 0; i < task->task_class->nb_flows; i++) + { + if (task->data[i].data_out == NULL) + continue; + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + + if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) + && (gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH) ) + { + parsec_gpu_data_copy_t* gpu_elem = task->data[i].data_out; + gpu_elem->version++; /* on to the next version */ + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]: GPU copy %p [ref_count %d] increments version to %d at %s:%d", + gpu_device->super.name, + gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->version, + __FILE__, __LINE__); } } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 4a65f1ac4..d7add673c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -41,7 +41,7 @@ typedef struct migrated_task_s parsec_gpu_task_t* gpu_task; parsec_device_gpu_module_t* dealer_device; parsec_device_gpu_module_t* starving_device; - + int stage_in_status; } migrated_task_t; typedef struct migration_accounting_s @@ -50,6 +50,7 @@ typedef struct migration_accounting_s int level0; int level1; int level2; + int received; } migration_accounting_t; int parsec_cuda_migrate_init(int ndevices); @@ -72,6 +73,9 @@ int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* g int increment_readers(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device); int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, parsec_device_gpu_module_t* dest_dev); +int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, + int stage_in_status); +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index ebe231b62..a4b9bf8be 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -816,7 +816,7 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - assert(0); + //assert(0); } #endif return PARSEC_SUCCESS; @@ -1196,17 +1196,10 @@ parsec_default_cuda_stage_in(parsec_gpu_task_t *gtask, for(i = 0; i < task->task_class->nb_flows; i++){ - if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("%s: Stage_in called for D2D copy from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), - ((parsec_device_cuda_module_t* )parsec_mca_device_get( task->data[i].data_in->device_index))->cuda_index, - task->data[i].data_in, - ((parsec_device_cuda_module_t* )parsec_mca_device_get(task->data[i].data_out->device_index))->cuda_index, - task->data[i].data_out); - if(flow_mask & (1U << i)){ copy_in = task->data[i].data_in; copy_out = task->data[i].data_out; + in_elem_dev = parsec_mca_device_get( copy_in->device_index); count = (copy_in->original->nb_elts <= copy_out->original->nb_elts) ? copy_in->original->nb_elts : copy_out->original->nb_elts; @@ -1217,25 +1210,7 @@ parsec_default_cuda_stage_in(parsec_gpu_task_t *gtask, cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, cuda_stream->cuda_stream ); PARSEC_CUDA_CHECK_ERROR( "cudaMemcpyAsync ", ret, { return PARSEC_ERROR; } ); - - //printf("copy_in Transfer status = %d \n", copy_in->data_transfer_status); - - //if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("%s: D2D copy successfull from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), - ((parsec_device_cuda_module_t* )in_elem_dev)->cuda_index, - copy_in, - ((parsec_device_cuda_module_t* )parsec_mca_device_get(copy_out->device_index))->cuda_index, - copy_out); } - - if(copy_in->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("%s: D2D copy successfull from cuda device %d (from copy %p) to cuda device %d (to copy %p) ", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gtask)->ec), - ((parsec_device_cuda_module_t* )in_elem_dev)->cuda_index, - copy_in, - ((parsec_device_cuda_module_t* )parsec_mca_device_get(copy_out->device_index))->cuda_index, - copy_out); } return PARSEC_SUCCESS; } @@ -1321,14 +1296,19 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, */ if( PARSEC_FLOW_ACCESS_WRITE & type ) { if (gpu_elem->readers > 0 ) { - if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { - parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " - "(possible anti-dependency,\n" - "or concurrent accesses), please prevent that with CTL dependencies\n", - gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); - parsec_atomic_unlock( &original->lock ); - return -1; - } + //we are migrating the data of task that has already been staged in. + // So we have incremented the reader for this data in change_task_features(). + //if(gpu_elem->data_transfer_status != PARSEC_DATA_STATUS_SHOULD_MIGRATE) + //{ + // if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { + // parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " + // "(possible anti-dependency,\n" + // "or concurrent accesses), please prevent that with CTL dependencies\n", + // gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); + // parsec_atomic_unlock( &original->lock ); + // return -1; + // } + //} } PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tDetach writable CUDA copy %p [ref_count %d] from any lists", @@ -1344,18 +1324,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) || /* check if this data belongs to a migrating task*/ in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) { - if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - { - //printf("%s: D2D from device %s to device %s for flow %d (from copy %p) \n", - // parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - // gpu_device->super.name, (&in_elem_dev->super)->super.name, flow->flow_index, - // in_elem); - - printf("%s: D2D from cuda device %d to cuda device %d for flow %d (from copy %p) \n", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task)->ec), - in_elem_dev->cuda_index, ((parsec_device_cuda_module_t* )gpu_device)->cuda_index, - flow->flow_index, in_elem); - } + parsec_data_status_t old_status = in_elem->data_transfer_status; int potential_alt_src = 0; if( PARSEC_DEV_CUDA == in_elem_dev->super.super.type ) { @@ -1375,7 +1344,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( PARSEC_DEV_CUDA != target->super.super.type ) continue; if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { parsec_data_copy_t *candidate = original->device_copies[t]; - if( NULL != candidate && candidate->version == in_elem->version ) { + + if( (NULL != candidate && candidate->version == in_elem->version) + /* || (NULL != candidate && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) */) + { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is a potential alternative source for in_elem %p on data %p", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, in_elem, original); @@ -1402,6 +1374,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; + goto src_selected; } } @@ -1429,7 +1402,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, transfer_from = parsec_data_start_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); if( PARSEC_FLOW_ACCESS_WRITE & type && gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH ) { - gpu_elem->version++; /* on to the next version */ + //gpu_elem->version++; /* on to the next version */ + gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] increments version to %d at %s:%d", gpu_device->super.name, @@ -1437,12 +1411,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, __FILE__, __LINE__); } - if(transfer_from == -1 && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - { - printf("Somethinng is wrong!! Transfer is already complete \n"); - transfer_from = 0; - } - /* If data is from NEW, as we skip NULL */ if( NULL == task_data->source_repo_entry ) transfer_from = -1; @@ -1475,7 +1443,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, in_elem_dev->super.super.device_index, in_elem->version, (void*)in_elem->device_private, in_elem, in_elem->super.super.obj_reference_count, gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); - assert((gpu_elem->version < in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + assert((gpu_elem->version <= in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); #if defined(PARSEC_PROF_TRACE) @@ -1524,11 +1492,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, #endif /* Push data into the GPU from the source device */ - if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("At the doorstep (copy %p) of D2D copy !! \n", in_elem); - if(gpu_task->stage_in == NULL && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) - printf("Something is wrong!! Stage_in is NULL"); - if(PARSEC_SUCCESS != (gpu_task->stage_in ? gpu_task->stage_in(gpu_task, (1U << flow->flow_index), gpu_stream): PARSEC_SUCCESS)) { parsec_warning( "%s:%d %s", __FILE__, __LINE__, "gpu_task->stage_in"); @@ -1536,20 +1499,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_warning("<<%p>> -> <<%p on CUDA device %d>> [%d, H2D]", in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, nb_elts); - if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("<<%p>> -> <<%p on CUDA device %d>> [%d, H2D]", - in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, - nb_elts); } else { parsec_warning("<<%p on CUDA device %d>> -> <<%p on CUDA device %d>> [%d, D2D]", in_elem->device_private, in_elem_dev->cuda_index, gpu_elem->device_private, cuda_device->cuda_index, nb_elts); - if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) - printf("<<%p on CUDA device %d>> -> <<%p on CUDA device %d>> [%d, D2D]", - in_elem->device_private, in_elem_dev->cuda_index, - gpu_elem->device_private, cuda_device->cuda_index, - nb_elts); } parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) @@ -1565,9 +1519,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( PARSEC_GPU_TASK_TYPE_KERNEL == gpu_task->task_type ) gpu_device->super.nb_data_faults += nb_elts; - /* update the data version in GPU immediately, and mark the data under transfer */ - assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); + ///* update the data version in GPU immediately, and mark the data under transfer */ + //assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + // || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", @@ -1587,10 +1541,20 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_fetch_dec_int32( &in_elem->readers ); assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); + //if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE && + // gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ) if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) { - parsec_atomic_fetch_dec_int32( &in_elem->readers ); in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; + parsec_device_gpu_module_t *in_elem_dev = + (parsec_device_gpu_module_t *)parsec_mca_device_get( in_elem->device_index); + if( ((parsec_device_module_t *)in_elem_dev)->type == PARSEC_DEV_CUDA) + { + parsec_atomic_fetch_dec_int32( &in_elem->readers ); + //parsec_list_item_ring_chop((parsec_list_item_t*)in_elem); + //PARSEC_LIST_ITEM_SINGLETON(in_elem); + //parsec_list_push_back(&in_elem_dev->gpu_mem_lru, (parsec_list_item_t*)in_elem); + } } parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); @@ -1976,14 +1940,14 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, (NULL != task->data[i].data_out->push_task) ? parsec_task_snprintf(task_str2, MAX_TASK_STRLEN, task->data[i].data_out->push_task) : "(null)", (task->data[i].data_out->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER) ? "all is good" : "Assertion", task->data[i].data_out->data_transfer_status); - assert(task->data[i].data_out->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER); - if( task->data[i].data_out->data_transfer_status != PARSEC_DATA_STATUS_COMPLETE_TRANSFER ) { /* data is not ready */ - /** - * As long as we have only one stream to push the data on the GPU we should never - * end up in this case. Remove previous assert if changed. - */ - return -1; - } + //assert(task->data[i].data_out->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER); + //if( task->data[i].data_out->data_transfer_status != PARSEC_DATA_STATUS_COMPLETE_TRANSFER ) { /* data is not ready */ + // /** + // * As long as we have only one stream to push the data on the GPU we should never + // * end up in this case. Remove previous assert if changed. + // */ + // return -1; + //} } gtask->complete_stage = NULL; @@ -2108,6 +2072,13 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if ( NULL == upstream_progress_fct ) { /* Grab the submit function */ progress_fct = task->submit; + + /** + * The change in data versioning is moved from stage_in step + * to here. This will make sure that tasks update the version + * only if it is sure to execute in the corresponding GPU. + */ + gpu_data_version_increment(task); #if defined(PARSEC_DEBUG_PARANOID) int i; const parsec_flow_t *flow; @@ -2417,7 +2388,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { - assert( 0 == gpu_copy->readers ); + // assert( 0 == gpu_copy->readers ); } } parsec_atomic_unlock(&original->lock); @@ -2510,7 +2481,7 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; - assert( 0 == gpu_copy->readers ); + //assert( 0 == gpu_copy->readers ); if( gpu_task->pushout & (1 << i) ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, @@ -2691,22 +2662,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, gpu_task), gpu_task->ec->priority ); } - - //if(which_gpu == 2) //CHANGE THIS, only for testing - //{ - // printf("Immediatly Migrate task from device %d \n", which_gpu-2); - // if(migrate_immediate(es, gpu_device, gpu_task)) - // { - // rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); - // return PARSEC_HOOK_RETURN_ASYNC; - // } - //} rc = progress_stream( gpu_device, gpu_device->exec_stream[0], parsec_cuda_kernel_push, gpu_task, &progress_task ); if( rc < 0 ) { /* In case of error progress_task is the task that raised it */ + if( -1 == rc ) goto disable_gpu; /* We are in the early stages, and if there no room on the GPU for a task we need to diff --git a/tests/dsl/ptg/cuda/CMakeLists.txt b/tests/dsl/ptg/cuda/CMakeLists.txt index 19cc68d02..29f4fd816 100644 --- a/tests/dsl/ptg/cuda/CMakeLists.txt +++ b/tests/dsl/ptg/cuda/CMakeLists.txt @@ -19,7 +19,9 @@ if(PARSEC_HAVE_CUDA) target_ptg_sources(stage PRIVATE "stage_custom.jdf") target_link_libraries(stage PRIVATE CUDA::cublas) - parsec_addtest_executable(C HelloWorldCuda ) + find_package(CUDAToolkit) + enable_language(CUDA) + parsec_addtest_executable(C HelloWorldCuda SOURCES HelloWorldCuda_kernel.cu ) target_include_directories(HelloWorldCuda PRIVATE $<$:${CMAKE_CURRENT_SOURCE_DIR}>) target_ptg_sources(HelloWorldCuda PRIVATE "HelloWorldCuda.jdf") target_link_libraries(HelloWorldCuda PRIVATE CUDA::cublas) diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf index b342e2ad3..e5ba26c5d 100644 --- a/tests/dsl/ptg/cuda/HelloWorldCuda.jdf +++ b/tests/dsl/ptg/cuda/HelloWorldCuda.jdf @@ -21,21 +21,20 @@ extern "C" %{ #include #endif /* defined(PARSEC_HAVE_CUDA) */ +void HelloWorld_cuda_kernel(double *A_double, int k); %} -NT [ type="int" ] -NL [ type="int" ] - +dcA [type = "parsec_matrix_block_cyclic_t*"] Start(k) k = 0 .. 0 -: taskdist( k ) +: dcA(0, 0 ) -WRITE A <- NEW - -> A HelloWorldCuda( 1 .. NT ) +RW A <- dcA(0, 0 ) + -> A HelloWorldCuda( 1 ) BODY { @@ -47,82 +46,28 @@ END HelloWorldCuda(k) -k = 1 .. NT +k = 1 .. 20 -: taskdist( k ) +: dcA(0, 0 ) -READ A <- A Start( 0 ) +RW A <- ( k == 1) ? A Start( 0 ) : A HelloWorldCuda(k-1) + -> ( k < 20) ? A HelloWorldCuda(k+1) : dcA(0, 0 ) BODY [type=CUDA weight=1] { - int a[2048]; - int i, j; - for( i = 0; i < NL; i++ ) - for( j = 1; j < 2048; j++ ) - a[i] = a[i] + a[i-1]; - - - printf("HelloWorldCuda on GPU \n"); + HelloWorld_cuda_kernel((double *) A, k); } END -BODY -{ - int i, j, a[100]; - for( i = 0; i < NL; i++ ) - for( j = 1; j < 100; j++ ) - a[i] = a[i] + a[i-1]; - - printf("HelloWorldCuda is running on a CPU, which is not the intended behavior \n"); -} -END extern "C" %{ - -static uint32_t -rank_of(parsec_data_collection_t *desc, ...) -{ - (void)desc; - return 0; -} - -static int32_t -vpid_of(parsec_data_collection_t *desc, ...) -{ - (void)desc; - return 0; -} - -static parsec_data_key_t -data_key(parsec_data_collection_t *desc, ...) -{ - int k; - va_list ap; - (void)desc; - va_start(ap, desc); - k = va_arg(ap, int); - va_end(ap); - return (uint64_t)k; -} int main(int argc, char *argv[]) { parsec_context_t* parsec; - int rc, rank, world, NT = 10, NL = 1; - parsec_data_collection_t taskdist; + int i, rc, rank, world; parsec_HelloWorldCuda_taskpool_t *tp; - - if(argc < 3) - { - printf("Usasge example: HelloWorldCuda [#tasks] [#loops per task]"); - exit(0); - } - else - { - NT = atoi(argv[1]); - NL = atoi(argv[2]); - } - + parsec_matrix_block_cyclic_t *dcA; #if defined(PARSEC_HAVE_MPI) { @@ -132,35 +77,48 @@ int main(int argc, char *argv[]) MPI_Comm_size(MPI_COMM_WORLD, &world); MPI_Comm_rank(MPI_COMM_WORLD, &rank); #else - world = 1; - rank = 0; + printf("Works only with MPI!!!); + exit(0); #endif parsec = parsec_init(-1, &argc, &argv); + dcA = (parsec_matrix_block_cyclic_t*)calloc(1, sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(dcA, + PARSEC_MATRIX_DOUBLE, + PARSEC_MATRIX_TILE, + 0, + 100, 100, /* Tile size */ + 100, 100, /* Global matrix size (what is stored)*/ + 0, 0, /* Staring point in the global matrix */ + 100, 100, /* Submatrix size (the one concerned by the computation */ + 1, 1, /* process process grid */ + 1, 1, /* k-cyclicity */ + 0, 0); /* starting point on the process grid */ + + dcA->mat = parsec_data_allocate((size_t)dcA->super.nb_local_tiles * + (size_t)dcA->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(dcA->super.mtype)); + + parsec_data_collection_set_key((parsec_data_collection_t*)dcA, "A"); + + for(i = 0; i < dcA->super.nb_local_tiles * dcA->super.mb * dcA->super.nb; i++) + ((double*)dcA->mat)[i] = 0; - parsec_data_collection_init(&taskdist, world, rank); - taskdist.rank_of = rank_of; - taskdist.vpid_of = vpid_of; - taskdist.data_key = data_key; + tp = parsec_HelloWorldCuda_new(dcA); - tp = parsec_HelloWorldCuda_new(&taskdist, NT, NL); + parsec_add2arena( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, dcA->super.mb, dcA->super.nb, dcA->super.mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); - //parsec_add2arena( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], - // parsec_datatype_double_complex_t, - // PARSEC_MATRIX_FULL, 1, 10, 10, 10, - // PARSEC_ARENA_ALIGNMENT_SSE, -1 ); - - parsec_arena_datatype_construct( &tp->arenas_datatypes[PARSEC_HelloWorldCuda_DEFAULT_ADT_IDX], - sizeof(int), PARSEC_ARENA_ALIGNMENT_SSE, - parsec_datatype_int_t ); rc = parsec_context_add_taskpool( parsec, &tp->super ); rc = parsec_context_start(parsec); rc = parsec_context_wait(parsec); - parsec_taskpool_free(&tp->super); - parsec_data_collection_destroy(&taskdist); + free(dcA->mat); + parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)dcA); parsec_fini(&parsec); #if defined(PARSEC_HAVE_MPI) diff --git a/tests/dsl/ptg/cuda/HelloWorldCuda_kernel.cu b/tests/dsl/ptg/cuda/HelloWorldCuda_kernel.cu new file mode 100644 index 000000000..16ab31217 --- /dev/null +++ b/tests/dsl/ptg/cuda/HelloWorldCuda_kernel.cu @@ -0,0 +1,18 @@ +#include + +extern "C" +{ + void HelloWorld_cuda_kernel(double *A_double, int k); +} + +__global__ void cuda_kernel(double *A_double, int k) +{ + int i; + for( i = 0; i < 100; i++ ) + *(A_double+i) = *(A_double+i) + k; +} + +void HelloWorld_cuda_kernel(double *A_double, int k) +{ + cuda_kernel<<<1,1>>>(A_double, k); +} \ No newline at end of file From 2545cec7306ad0129ec24ca6d36dbb5b1abfc1c6 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 20 May 2022 07:17:57 +1000 Subject: [PATCH 045/215] assert diabled as there are multiple pushed of data --- parsec/mca/device/cuda/device_cuda_module.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index a4b9bf8be..0e03b64ba 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1345,8 +1345,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { parsec_data_copy_t *candidate = original->device_copies[t]; - if( (NULL != candidate && candidate->version == in_elem->version) - /* || (NULL != candidate && in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) */) + if( (NULL != candidate && candidate->version == in_elem->version) ) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is a potential alternative source for in_elem %p on data %p", @@ -1520,8 +1519,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.nb_data_faults += nb_elts; ///* update the data version in GPU immediately, and mark the data under transfer */ - //assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - // || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); + assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", From e7d77f395776b1a0861b95fd473ceaed698fc65d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 20 May 2022 08:27:16 +1000 Subject: [PATCH 046/215] migrate_status added a member of parsec_gpu_task_s --- parsec/interfaces/ptg/ptg-compiler/jdf2c.c | 1 + parsec/mca/device/device_gpu.h | 1 + 2 files changed, 2 insertions(+) diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index 8549e55a0..ef4afb2d5 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -6840,6 +6840,7 @@ static void jdf_generate_code_hook_cuda(const jdf_t *jdf, string_arena_free(info.sa); coutput(" parsec_device_load[dev_index] += gpu_task->load;\n" + " gpu_task->migrate_status = 0;\n" "\n" " return parsec_cuda_kernel_scheduler( es, gpu_task, dev_index );\n" "}\n\n"); diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index c01e9841c..1a6790d46 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -84,6 +84,7 @@ struct parsec_gpu_task_s { parsec_complete_stage_function_t complete_stage; parsec_stage_in_function_t *stage_in; parsec_stage_out_function_t *stage_out; + int migrate_status; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From 673043384977c138c8a979e859fe07a6f2a15d3d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 20 May 2022 08:35:04 +1000 Subject: [PATCH 047/215] migrate_status checked for operations related to task migration. --- parsec/mca/device/cuda/device_cuda_migrate.c | 3 ++- parsec/mca/device/cuda/device_cuda_module.c | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e5f16a0ad..771bf7e94 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -371,6 +371,8 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device + migrated_gpu_task->migrate_status = 1; //change migrate_status + mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; @@ -450,7 +452,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } src_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - src_copy->data_transfer_status = PARSEC_DATA_STATUS_SHOULD_MIGRATE; parsec_atomic_unlock( &original->lock ); } diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 0e03b64ba..d9c5df58b 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1298,7 +1298,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if (gpu_elem->readers > 0 ) { //we are migrating the data of task that has already been staged in. // So we have incremented the reader for this data in change_task_features(). - //if(gpu_elem->data_transfer_status != PARSEC_DATA_STATUS_SHOULD_MIGRATE) + //if(gpu_task->migrate_status == 1) //{ // if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { // parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " @@ -1322,7 +1322,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * Current limitations: only for read-only data used read-only on the hosting GPU. */ parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) - || /* check if this data belongs to a migrating task*/ in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE ) + || (gpu_task->migrate_status == 1)) { parsec_data_status_t old_status = in_elem->data_transfer_status; @@ -1443,7 +1443,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); assert((gpu_elem->version <= in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); + || (gpu_task->migrate_status == 1)); #if defined(PARSEC_PROF_TRACE) if( gpu_stream->prof_event_track_enable ) { @@ -1520,7 +1520,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, ///* update the data version in GPU immediately, and mark the data under transfer */ assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE)); + || (gpu_task->migrate_status == 1)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", @@ -1540,9 +1540,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_fetch_dec_int32( &in_elem->readers ); assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); - //if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE && - // gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ) - if(in_elem->data_transfer_status == PARSEC_DATA_STATUS_SHOULD_MIGRATE) + if(gpu_task->migrate_status == 1) { in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; parsec_device_gpu_module_t *in_elem_dev = From cb7ec1f1e8698ea73f967d71a04b9fa616f45261 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 21 May 2022 00:33:45 +1000 Subject: [PATCH 048/215] DTD GEMM test disabled due to cblas header file problems --- tests/dsl/dtd/CMakeLists.txt | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index a5c5a29ae..90e5881a2 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -21,16 +21,4 @@ parsec_addtest_executable(C dtd_test_global_id_for_dc_assumed SOURCES dtd_test_g parsec_addtest_executable(C dtd_test_explicit_task_creation SOURCES dtd_test_explicit_task_creation.c) parsec_addtest_executable(C dtd_test_tp_enqueue_dequeue SOURCES dtd_test_tp_enqueue_dequeue.c) parsec_addtest_executable(C dtd_test_interleave_actions SOURCES dtd_test_interleave_actions.c) -if( PARSEC_HAVE_CUDA ) - parsec_addtest_executable(C dtd_test_cuda_task_insert SOURCES dtd_test_cuda_task_insert.c) - if( TARGET CUDA::cublas ) - parsec_addtest_executable(C dtd_bench_simple_gemm SOURCES dtd_bench_simple_gemm.c) - target_link_libraries(dtd_bench_simple_gemm PRIVATE CUDA::cublas m) - find_package(BLAS) - if(BLAS_FOUND) - target_link_libraries(dtd_bench_simple_gemm PRIVATE ${BLAS_LIBRARIES}) - target_include_directories(dtd_bench_simple_gemm PRIVATE ${BLAS_INCLUDE_DIRS}) - target_compile_definitions(dtd_bench_simple_gemm PRIVATE HAVE_BLAS=1) - endif(BLAS_FOUND) - endif( TARGET CUDA::cublas ) -endif() + From 29f42725d8b1b26a02171784afaedb8482d985d0 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 21 May 2022 02:09:12 +1000 Subject: [PATCH 049/215] 3 status added to a GPU task. Conditions changed to reflect this change --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++++-- parsec/mca/device/cuda/device_cuda_migrate.h | 4 ++++ parsec/mca/device/cuda/device_cuda_module.c | 19 +++++++++++++------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 771bf7e94..db9a66b41 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -243,9 +243,9 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) if(mig_task != NULL) { - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); migrated_gpu_task = mig_task->gpu_task; + assert( migrated_gpu_task->migrate_status != TASK_NOT_MIGRATED ); dealer_device = mig_task->dealer_device; starving_device = mig_task->starving_device; stage_in_status = mig_task->stage_in_status; @@ -371,7 +371,11 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - migrated_gpu_task->migrate_status = 1; //change migrate_status + //change migrate_status + if( execution_level == 2 ) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index d7add673c..525de26d1 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -18,6 +18,10 @@ #define PARSEC_DATA_STATUS_UNDER_MIGRATION ((parsec_data_coherency_t)0x4) #define PARSEC_DATA_STATUS_MIGRATION_COMPLETE ((parsec_data_coherency_t)0x5) +#define TASK_NOT_MIGRATED 0 +#define TASK_MIGRATED_BEFORE_STAGE_IN 1 +#define TASK_MIGRATED_AFTER_STAGE_IN 2 + /** * @brief * level 0 - task has been enqueued to the pending queue of the device. It has not been progressed. diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d9c5df58b..18d5b3e59 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -869,6 +869,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, { parsec_task_t *this_task = gpu_task->ec; parsec_gpu_data_copy_t* temp_loc[MAX_PARAM_COUNT], *gpu_elem, *lru_gpu_elem; + parsec_gpu_data_copy_t* old_data; parsec_data_t* master, *oldmaster; const parsec_flow_t *flow; int i, j, data_avail_epoch = 0; @@ -902,6 +903,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, master = this_task->data[i].data_in->original; parsec_atomic_lock(&master->lock); gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index); + old_data = this_task->data[i].data_out; this_task->data[i].data_out = gpu_elem; /* There is already a copy on the device */ @@ -920,6 +922,11 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock(&master->lock); return PARSEC_HOOK_RETURN_AGAIN; } + + //compensate for the reader incremeneted during the first strage_in + if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + old_data->readers--; + parsec_atomic_unlock(&master->lock); continue; } @@ -1298,7 +1305,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if (gpu_elem->readers > 0 ) { //we are migrating the data of task that has already been staged in. // So we have incremented the reader for this data in change_task_features(). - //if(gpu_task->migrate_status == 1) + //if(gpu_task->migrate_status > TASK_NOT_MIGRATED) //{ // if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { // parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " @@ -1322,7 +1329,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * Current limitations: only for read-only data used read-only on the hosting GPU. */ parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) - || (gpu_task->migrate_status == 1)) + || (gpu_task->migrate_status > TASK_NOT_MIGRATED)) { parsec_data_status_t old_status = in_elem->data_transfer_status; @@ -1443,7 +1450,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); assert((gpu_elem->version <= in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (gpu_task->migrate_status == 1)); + || (gpu_task->migrate_status > TASK_NOT_MIGRATED)); #if defined(PARSEC_PROF_TRACE) if( gpu_stream->prof_event_track_enable ) { @@ -1520,7 +1527,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, ///* update the data version in GPU immediately, and mark the data under transfer */ assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (gpu_task->migrate_status == 1)); + || (gpu_task->migrate_status > TASK_NOT_MIGRATED)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", @@ -1540,7 +1547,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_fetch_dec_int32( &in_elem->readers ); assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); - if(gpu_task->migrate_status == 1) + if(gpu_task->migrate_status != TASK_NOT_MIGRATED) { in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; parsec_device_gpu_module_t *in_elem_dev = @@ -2478,7 +2485,7 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; - //assert( 0 == gpu_copy->readers ); + assert( 0 == gpu_copy->readers ); if( gpu_task->pushout & (1 << i) ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, From f74e3ae8c8ca6a38f6b4195daf69d6b04db20217 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 21 May 2022 02:42:18 +1000 Subject: [PATCH 050/215] incerement corrected. We were decrementing the reader when we should have been incrementing --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index db9a66b41..3e65d44ae 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -442,7 +442,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t else if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) { - parsec_atomic_fetch_dec_int32(&task->data[i].data_out->readers); + parsec_atomic_fetch_inc_int32(&task->data[i].data_out->readers); parsec_list_item_ring_chop((parsec_list_item_t*)src_copy); PARSEC_LIST_ITEM_SINGLETON(src_copy); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); From 7307854dfd7935c702d22af12dd356d78c7ce914 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 23 May 2022 23:54:36 +1000 Subject: [PATCH 051/215] hash table used to track the memeory of a migrated data. When a task is migrated, its old data is tracked using a hashtable entry (migrate_hash_table_insert()). When the migrated task is stage in is complete (the second time) the readers are updated using migrate_hash_table_delete(). --- parsec/mca/device/cuda/device_cuda_migrate.c | 120 +++++++++++++++++-- parsec/mca/device/cuda/device_cuda_migrate.h | 16 +++ parsec/mca/device/cuda/device_cuda_module.c | 36 +++--- 3 files changed, 145 insertions(+), 27 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3e65d44ae..8aec81de1 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -5,6 +5,9 @@ parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; migration_accounting_t* accounting; +static parsec_hash_table_t *migrated_data_hash_table = NULL; + + PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); /** @@ -45,6 +48,12 @@ int parsec_cuda_migrate_init(int ndevices) nvml_ret = nvmlInit_v2(); #endif + migrated_data_hash_table = PARSEC_OBJ_NEW(parsec_hash_table_t); + parsec_hash_table_init(migrated_data_hash_table, + offsetof(migrated_data_t, ht_item), + 8, migrated_data_key_fns, NULL); + + char hostname[256]; gethostname(hostname, sizeof(hostname)); printf("PID %d on %s ready for attach\n", getpid(), hostname); @@ -77,6 +86,8 @@ int parsec_cuda_migrate_fini() PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); + parsec_hash_table_fini(migrated_data_hash_table); + printf("Migration module shut down \n"); return 0; @@ -249,6 +260,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) dealer_device = mig_task->dealer_device; starving_device = mig_task->starving_device; stage_in_status = mig_task->stage_in_status; + change_task_features(migrated_gpu_task, dealer_device, stage_in_status); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); @@ -263,6 +275,74 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) return 0; } +int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task ) +{ + int i; + migrated_data_t *migrated_data_item = NULL; + parsec_task_t *task = migrated_gpu_task->ec; + migrated_data_item = (migrated_data_t *) calloc(1, sizeof(migrated_data_t)); + migrated_data_item->ht_item.key = (parsec_key_t) task->task_class->make_key((const parsec_taskpool_t*)task->taskpool, + (const parsec_assignment_t*)&task->locals); + for( i = 0; i < task->task_class->nb_flows; i++) + { + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & migrated_gpu_task->flow[i]->flow_flags)) //CTL flow + { + migrated_data_item->old_copy[i] = NULL; + continue; + } + + if(task->data->data_out == NULL) + migrated_data_item->old_copy[i] = NULL; + else + migrated_data_item->old_copy[i] = task->data->data_out; + } + + parsec_hash_table_lock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); + parsec_hash_table_nolock_insert(migrated_data_hash_table, &migrated_data_item->ht_item); + parsec_hash_table_unlock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); + + return 1; +} + +int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task) +{ + int i; + migrated_data_t* migrated_data_item = NULL; + parsec_task_t* task = migrated_gpu_task->ec; + parsec_key_t key; + + + key = (parsec_key_t) migrated_gpu_task->ec->task_class->make_key((const parsec_taskpool_t*)migrated_gpu_task->ec->taskpool, + (const parsec_assignment_t*)&migrated_gpu_task->ec->locals); + + parsec_hash_table_lock_bucket(migrated_data_hash_table, key); + migrated_data_item = (migrated_data_t*) parsec_hash_table_nolock_remove(migrated_data_hash_table, key); + parsec_hash_table_unlock_bucket(migrated_data_hash_table, key); + + if( migrated_data_item != NULL) + { + if( migrated_gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + { + for( i = 0; i < task->task_class->nb_flows; i++) + { + if(migrated_data_item->old_copy[i] == NULL) + continue; + + parsec_data_t* original = migrated_data_item->old_copy[i]->original; + parsec_atomic_lock( &original->lock ); + parsec_atomic_fetch_dec_int32( &migrated_data_item->old_copy[i]->readers ); + parsec_atomic_unlock( &original->lock ); + } + } + + free(migrated_data_item); + } + + + return 1; +} + + /** * This function migrate a specific task from a device a * to another. @@ -311,6 +391,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return 0; starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); + #if 0 migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( &(dealer_device->pending) ); //level 0 execution_level = 0; if(migrated_gpu_task == NULL) @@ -331,6 +412,18 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module } } } + #endif + + for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 + if(migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } + } if(migrated_gpu_task != NULL) @@ -382,8 +475,9 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module mig_task->gpu_task = migrated_gpu_task; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; - mig_task->stage_in_status = (execution_level == 2) ? /* stage_in complete */ 1 : 0; + mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); + parsec_cuda_mig_task_enqueue(es, mig_task); char tmp[MAX_TASK_STRLEN]; @@ -410,35 +504,37 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow continue; - if( stage_in_status == 1) + if( stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN ) { parsec_data_t* original = task->data[i].data_out->original; parsec_atomic_lock( &original->lock ); + src_copy = task->data[i].data_out; + assert(task->data[i].data_in->original == task->data[i].data_out->original); if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) assert( task->data[i].data_out->version == task->data[i].data_in->version); - - src_copy = task->data[i].data_out; - assert(task->data[i].data_out != NULL); assert(original->device_copies[dealer_device->super.device_index]!= NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(src_copy->readers >= 0); - - assert( task->data[i].data_out->version == task->data[i].data_in->version); - if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) assert(task->data[i].data_out->version == task->data[i].data_in->version); - //compensate for the reader incremented during the first stage_in + /** + * @brief add all the data to dealer_device->gpu_mem_lru, but make sure + * there are enough readers to ensure the data is not evicted. + */ + + //There is already a reader incremented after first stage_in if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) { assert( task->data[i].data_out->readers >= 0 ); } + // No readers else if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) { @@ -447,6 +543,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(src_copy); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); } + //There is already a reader incremented after first stage_in else if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) { @@ -459,7 +556,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_unlock( &original->lock ); } - else + else { /** * The data GPU will be the owner of the data, only if the task executing on that GPU @@ -485,6 +582,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_unlock( &original->lock ); } } + + migrate_hash_table_insert(gpu_task); + return 0; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 525de26d1..3c3e42941 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -57,6 +57,19 @@ typedef struct migration_accounting_s int received; } migration_accounting_t; +typedef struct migrated_data_s +{ + parsec_hash_table_item_t ht_item; + parsec_data_copy_t *old_copy[MAX_PARAM_COUNT]; +} migrated_data_t; + +static parsec_key_fn_t migrated_data_key_fns = { + .key_equal = parsec_hash_table_generic_64bits_key_equal, + .key_print = parsec_hash_table_generic_64bits_key_print, + .key_hash = parsec_hash_table_generic_64bits_key_hash +}; + + int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); @@ -80,6 +93,9 @@ int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* sr int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); +int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task ); +int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); + diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 18d5b3e59..3419aaeb2 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -923,10 +923,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, return PARSEC_HOOK_RETURN_AGAIN; } - //compensate for the reader incremeneted during the first strage_in - if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - old_data->readers--; - parsec_atomic_unlock(&master->lock); continue; } @@ -1547,19 +1543,19 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_fetch_dec_int32( &in_elem->readers ); assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); - if(gpu_task->migrate_status != TASK_NOT_MIGRATED) - { - in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; - parsec_device_gpu_module_t *in_elem_dev = - (parsec_device_gpu_module_t *)parsec_mca_device_get( in_elem->device_index); - if( ((parsec_device_module_t *)in_elem_dev)->type == PARSEC_DEV_CUDA) - { - parsec_atomic_fetch_dec_int32( &in_elem->readers ); - //parsec_list_item_ring_chop((parsec_list_item_t*)in_elem); - //PARSEC_LIST_ITEM_SINGLETON(in_elem); - //parsec_list_push_back(&in_elem_dev->gpu_mem_lru, (parsec_list_item_t*)in_elem); - } - } + //if(gpu_task->migrate_status != TASK_NOT_MIGRATED) + //{ + // in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; + // parsec_device_gpu_module_t *in_elem_dev = + // (parsec_device_gpu_module_t *)parsec_mca_device_get( in_elem->device_index); + // if( ((parsec_device_module_t *)in_elem_dev)->type == PARSEC_DEV_CUDA) + // { + // parsec_atomic_fetch_dec_int32( &in_elem->readers ); + // //parsec_list_item_ring_chop((parsec_list_item_t*)in_elem); + // //PARSEC_LIST_ITEM_SINGLETON(in_elem); + // //parsec_list_push_back(&in_elem_dev->gpu_mem_lru, (parsec_list_item_t*)in_elem); + // } + //} parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); @@ -2227,6 +2223,9 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } + if( gpu_task->migrate_status > TASK_NOT_MIGRATED ) + migrate_hash_table_delete(gpu_task); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", gpu_device->super.name, @@ -2802,6 +2801,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec = NULL; goto remove_gpu_task; } + //if( gpu_task->migrate_status > TASK_NOT_MIGRATED ) + // migrate_hash_table_delete(gpu_task); + parsec_cuda_kernel_epilog( gpu_device, gpu_task ); __parsec_complete_execution( es, gpu_task->ec ); gpu_device->super.executed_tasks++; From 8ef7a270b31d8db6672c429d6f1763aa6c97b356 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 24 May 2022 04:40:42 +1000 Subject: [PATCH 052/215] data copy increment and decrement replaced by macros PARSEC_DATA_COPY_DEC_READERS, PARSEC_DATA_COPY_INC_READERS, PARSEC_DATA_COPY_DEC_READERS_ATOMIC, PARSEC_DATA_COPY_INC_READERS_ATOMIC --- parsec/data.c | 3 +- parsec/data_internal.h | 16 ++++++ parsec/mca/device/cuda/device_cuda_migrate.c | 51 +++++++------------- parsec/mca/device/cuda/device_cuda_migrate.h | 4 +- parsec/mca/device/cuda/device_cuda_module.c | 36 ++++++-------- parsec/mca/device/transfer_gpu.c | 6 ++- 6 files changed, 57 insertions(+), 59 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 0f022c008..4ed0055a9 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -417,7 +417,8 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, assert( (!transfer_required) || (data->device_copies[valid_copy]->version >= copy->version) ); if( PARSEC_FLOW_ACCESS_READ & access_mode ) { - copy->readers++; + //copy->readers++; + PARSEC_DATA_COPY_INC_READERS(copy); } if( PARSEC_FLOW_ACCESS_WRITE & access_mode ) { data->owner_device = (uint8_t)device; diff --git a/parsec/data_internal.h b/parsec/data_internal.h index 11ae65ad6..891d711cc 100644 --- a/parsec/data_internal.h +++ b/parsec/data_internal.h @@ -11,6 +11,22 @@ #ifndef DATA_INTERNAL_H_HAS_BEEN_INCLUDED #define DATA_INTERNAL_H_HAS_BEEN_INCLUDED +#define PARSEC_DATA_COPY_DEC_READERS(COPY) \ + do{ \ + COPY->readers--; \ + assert(COPY->readers >= 0); \ + }while(0); \ + +#define PARSEC_DATA_COPY_INC_READERS(COPY) COPY->readers++; + +#define PARSEC_DATA_COPY_DEC_READERS_ATOMIC(COPY) \ + do{ \ + parsec_atomic_fetch_dec_int32( ©->readers ); \ + assert(COPY->readers >= 0); \ + }while(0); \ + +#define PARSEC_DATA_COPY_INC_READERS_ATOMIC(COPY) parsec_atomic_fetch_inc_int32( ©->readers ); + /** @addtogroup parsec_internal_data * @{ */ diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 8aec81de1..1fc1fe13f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -275,12 +275,13 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) return 0; } -int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task ) +int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device) { int i; migrated_data_t *migrated_data_item = NULL; parsec_task_t *task = migrated_gpu_task->ec; migrated_data_item = (migrated_data_t *) calloc(1, sizeof(migrated_data_t)); + migrated_data_item->dealer_device = dealer_device; migrated_data_item->ht_item.key = (parsec_key_t) task->task_class->make_key((const parsec_taskpool_t*)task->taskpool, (const parsec_assignment_t*)&task->locals); for( i = 0; i < task->task_class->nb_flows; i++) @@ -330,7 +331,17 @@ int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task) parsec_data_t* original = migrated_data_item->old_copy[i]->original; parsec_atomic_lock( &original->lock ); - parsec_atomic_fetch_dec_int32( &migrated_data_item->old_copy[i]->readers ); + + if( (PARSEC_FLOW_ACCESS_READ & migrated_gpu_task->flow[i]->flow_flags) ) + { + //parsec_atomic_fetch_dec_int32( &migrated_data_item->old_copy[i]->readers ); + if( migrated_data_item->old_copy[i]->readers > 0 ) + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(migrated_data_item->old_copy[i]); + } + + parsec_list_push_back(&migrated_data_item->dealer_device->gpu_mem_lru, + (parsec_list_item_t*)migrated_data_item->old_copy[i]); + parsec_atomic_unlock( &original->lock ); } } @@ -521,40 +532,14 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert( task->data[i].data_out->version == task->data[i].data_in->version); if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) assert(task->data[i].data_out->version == task->data[i].data_in->version); - - - /** - * @brief add all the data to dealer_device->gpu_mem_lru, but make sure - * there are enough readers to ensure the data is not evicted. - */ - - //There is already a reader incremented after first stage_in - if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) - && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) - { - assert( task->data[i].data_out->readers >= 0 ); - } - // No readers - else if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) - && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) - { - parsec_atomic_fetch_inc_int32(&task->data[i].data_out->readers); - parsec_list_item_ring_chop((parsec_list_item_t*)src_copy); - PARSEC_LIST_ITEM_SINGLETON(src_copy); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); - } - //There is already a reader incremented after first stage_in - else if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) - && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) - { - parsec_list_item_ring_chop((parsec_list_item_t*)src_copy); - PARSEC_LIST_ITEM_SINGLETON(src_copy); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)src_copy); - } + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(src_copy); src_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; parsec_atomic_unlock( &original->lock ); + + } else { @@ -583,7 +568,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } } - migrate_hash_table_insert(gpu_task); + migrate_hash_table_insert(gpu_task, dealer_device); return 0; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 3c3e42941..561e89ca2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -60,6 +60,7 @@ typedef struct migration_accounting_s typedef struct migrated_data_s { parsec_hash_table_item_t ht_item; + parsec_device_gpu_module_t* dealer_device; parsec_data_copy_t *old_copy[MAX_PARAM_COUNT]; } migrated_data_t; @@ -87,13 +88,12 @@ int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); -int increment_readers(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device); int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, parsec_device_gpu_module_t* dest_dev); int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); -int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task ); +int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3419aaeb2..d973a7dc1 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -816,7 +816,7 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - //assert(0); + assert(0); } #endif return PARSEC_SUCCESS; @@ -1368,7 +1368,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - parsec_atomic_fetch_inc_int32( &candidate->readers ); + //parsec_atomic_fetch_inc_int32( &candidate->readers ); + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + undo_readers_inc_if_no_transfer = 1; /* We swap data_in with candidate, so we update the reference counters */ PARSEC_OBJ_RETAIN(candidate); @@ -1540,23 +1542,12 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, return 1; } if( undo_readers_inc_if_no_transfer ) - parsec_atomic_fetch_dec_int32( &in_elem->readers ); + { + //parsec_atomic_fetch_dec_int32( &in_elem->readers ); + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); + } assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); - //if(gpu_task->migrate_status != TASK_NOT_MIGRATED) - //{ - // in_elem->data_transfer_status = PARSEC_DATA_STATUS_NOT_TRANSFER; - // parsec_device_gpu_module_t *in_elem_dev = - // (parsec_device_gpu_module_t *)parsec_mca_device_get( in_elem->device_index); - // if( ((parsec_device_module_t *)in_elem_dev)->type == PARSEC_DEV_CUDA) - // { - // parsec_atomic_fetch_dec_int32( &in_elem->readers ); - // //parsec_list_item_ring_chop((parsec_list_item_t*)in_elem); - // //PARSEC_LIST_ITEM_SINGLETON(in_elem); - // //parsec_list_push_back(&in_elem_dev->gpu_mem_lru, (parsec_list_item_t*)in_elem); - // } - //} - parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, @@ -1898,7 +1889,8 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, /* Nobody is at the door to handle that event on the source of that data... * we do the command directly */ parsec_atomic_lock( &task->data[i].data_in->original->lock ); - task->data[i].data_in->readers--; + //task->data[i].data_in->readers--; + PARSEC_DATA_COPY_DEC_READERS(task->data[i].data_in); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tExecuting D2D transfer complete for copy %p [ref_count %d] for " "device %s -- readers now %d", @@ -1967,7 +1959,8 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, tmp, gpu_copy->readers, gpu_copy->device_index, gpu_copy->version, gpu_copy->flags, gpu_copy->coherency_state, gpu_copy->data_transfer_status); - gpu_copy->readers--; + //gpu_copy->readers--; + PARSEC_DATA_COPY_DEC_READERS(gpu_copy); if( 0 == gpu_copy->readers ) { parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); @@ -2307,7 +2300,8 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, } parsec_atomic_lock(&original->lock); if( flow->flow_flags & PARSEC_FLOW_ACCESS_READ ) { - gpu_copy->readers--; + //gpu_copy->readers--; + PARSEC_DATA_COPY_DEC_READERS(gpu_copy); if( gpu_copy->readers < 0 ) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: While trying to Pop %s, gpu_copy %p [ref_count %d] on flow %d with original %p had already 0 readers", @@ -2391,7 +2385,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { - // assert( 0 == gpu_copy->readers ); + assert( 0 == gpu_copy->readers ); } } parsec_atomic_unlock(&original->lock); diff --git a/parsec/mca/device/transfer_gpu.c b/parsec/mca/device/transfer_gpu.c index 9e5cf5b2e..521417426 100644 --- a/parsec/mca/device/transfer_gpu.c +++ b/parsec/mca/device/transfer_gpu.c @@ -243,7 +243,8 @@ parsec_gpu_create_w2r_task(parsec_device_gpu_module_t *gpu_device, } parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); - gpu_copy->readers++; + //gpu_copy->readers++; + PARSEC_DATA_COPY_INC_READERS(gpu_copy); d2h_task->data[nb_cleaned].data_out = gpu_copy; gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_UNDER_TRANSFER; /* mark the copy as in transfer */ parsec_atomic_unlock( &gpu_copy->original->lock ); @@ -295,7 +296,8 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, for( int i = 0; i < task->locals[0].value; i++ ) { gpu_copy = task->data[i].data_out; parsec_atomic_lock(&gpu_copy->original->lock); - gpu_copy->readers--; + //gpu_copy->readers--; + PARSEC_DATA_COPY_DEC_READERS(gpu_copy); gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; gpu_device->super.transferred_data_out += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ assert(gpu_copy->readers >= 0); From 2048d198bfdda4d0583da0b015ba599ef179cfc1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 24 May 2022 23:35:32 +1000 Subject: [PATCH 053/215] all reader manipulations changed to atomic --- parsec/mca/device/cuda/device_cuda_module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d973a7dc1..9ea9b2981 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1890,7 +1890,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, * we do the command directly */ parsec_atomic_lock( &task->data[i].data_in->original->lock ); //task->data[i].data_in->readers--; - PARSEC_DATA_COPY_DEC_READERS(task->data[i].data_in); + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(task->data[i].data_in); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tExecuting D2D transfer complete for copy %p [ref_count %d] for " "device %s -- readers now %d", @@ -1960,7 +1960,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, gpu_copy->readers, gpu_copy->device_index, gpu_copy->version, gpu_copy->flags, gpu_copy->coherency_state, gpu_copy->data_transfer_status); //gpu_copy->readers--; - PARSEC_DATA_COPY_DEC_READERS(gpu_copy); + PARSEC_DATA_COPY_DEC_READERS_ATOMOC(gpu_copy); if( 0 == gpu_copy->readers ) { parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); @@ -2301,7 +2301,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_lock(&original->lock); if( flow->flow_flags & PARSEC_FLOW_ACCESS_READ ) { //gpu_copy->readers--; - PARSEC_DATA_COPY_DEC_READERS(gpu_copy); + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(gpu_copy); if( gpu_copy->readers < 0 ) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: While trying to Pop %s, gpu_copy %p [ref_count %d] on flow %d with original %p had already 0 readers", From 11c9c2de98bb6d4353008ff2b98914824e6809e7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 25 May 2022 03:53:48 +1000 Subject: [PATCH 054/215] for tasks with status TASK_MIGRATED_AFTER_STAGE_IN the source data is always found from the staged in data available in data_out. So a these kind of tasks always finds the data from the source GPU, unless the data is already available in destination GPU. If the data is available in the destination GPU, parsec_data_start_transfer_ownership_to_copy function can take care of it. The readers incremnted to make sure the data is not evicted is decremented using the function gpu_data_compensate_reader() after the second stage_in. --- parsec/mca/device/cuda/device_cuda_migrate.c | 211 +++++++++++-------- parsec/mca/device/cuda/device_cuda_migrate.h | 1 + parsec/mca/device/cuda/device_cuda_module.c | 11 +- 3 files changed, 134 insertions(+), 89 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1fc1fe13f..0a76a9e96 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -275,83 +275,6 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) return 0; } -int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device) -{ - int i; - migrated_data_t *migrated_data_item = NULL; - parsec_task_t *task = migrated_gpu_task->ec; - migrated_data_item = (migrated_data_t *) calloc(1, sizeof(migrated_data_t)); - migrated_data_item->dealer_device = dealer_device; - migrated_data_item->ht_item.key = (parsec_key_t) task->task_class->make_key((const parsec_taskpool_t*)task->taskpool, - (const parsec_assignment_t*)&task->locals); - for( i = 0; i < task->task_class->nb_flows; i++) - { - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & migrated_gpu_task->flow[i]->flow_flags)) //CTL flow - { - migrated_data_item->old_copy[i] = NULL; - continue; - } - - if(task->data->data_out == NULL) - migrated_data_item->old_copy[i] = NULL; - else - migrated_data_item->old_copy[i] = task->data->data_out; - } - - parsec_hash_table_lock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); - parsec_hash_table_nolock_insert(migrated_data_hash_table, &migrated_data_item->ht_item); - parsec_hash_table_unlock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); - - return 1; -} - -int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task) -{ - int i; - migrated_data_t* migrated_data_item = NULL; - parsec_task_t* task = migrated_gpu_task->ec; - parsec_key_t key; - - - key = (parsec_key_t) migrated_gpu_task->ec->task_class->make_key((const parsec_taskpool_t*)migrated_gpu_task->ec->taskpool, - (const parsec_assignment_t*)&migrated_gpu_task->ec->locals); - - parsec_hash_table_lock_bucket(migrated_data_hash_table, key); - migrated_data_item = (migrated_data_t*) parsec_hash_table_nolock_remove(migrated_data_hash_table, key); - parsec_hash_table_unlock_bucket(migrated_data_hash_table, key); - - if( migrated_data_item != NULL) - { - if( migrated_gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - { - for( i = 0; i < task->task_class->nb_flows; i++) - { - if(migrated_data_item->old_copy[i] == NULL) - continue; - - parsec_data_t* original = migrated_data_item->old_copy[i]->original; - parsec_atomic_lock( &original->lock ); - - if( (PARSEC_FLOW_ACCESS_READ & migrated_gpu_task->flow[i]->flow_flags) ) - { - //parsec_atomic_fetch_dec_int32( &migrated_data_item->old_copy[i]->readers ); - if( migrated_data_item->old_copy[i]->readers > 0 ) - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(migrated_data_item->old_copy[i]); - } - - parsec_list_push_back(&migrated_data_item->dealer_device->gpu_mem_lru, - (parsec_list_item_t*)migrated_data_item->old_copy[i]); - - parsec_atomic_unlock( &original->lock ); - } - } - - free(migrated_data_item); - } - - - return 1; -} /** @@ -520,7 +443,35 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t* original = task->data[i].data_out->original; parsec_atomic_lock( &original->lock ); - src_copy = task->data[i].data_out; + //staged in data is already available it data_out + task->data[i].data_in = task->data[i].data_out; + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + + /** + * @brief If the task only has write access remove it from owned LRU and add + * it to mem LRU. Increment the reader to make usre the data will + * not be evicted. + */ + if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + } + /** + * @brief For read_write the flow the readers will already be inceremnetd + * but, the data will be in the owned LRU move it to the mem LRU + */ + if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + + } assert(task->data[i].data_in->original == task->data[i].data_out->original); if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) @@ -528,15 +479,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out != NULL); assert(original->device_copies[dealer_device->super.device_index]!= NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); - assert(src_copy->readers >= 0); + assert(task->data[i].data_in->readers >= 0); assert( task->data[i].data_out->version == task->data[i].data_in->version); if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) assert(task->data[i].data_out->version == task->data[i].data_in->version); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(src_copy); - src_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - parsec_atomic_unlock( &original->lock ); @@ -568,7 +515,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } } - migrate_hash_table_insert(gpu_task, dealer_device); + //migrate_hash_table_insert(gpu_task, dealer_device); return 0; } @@ -600,3 +547,99 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task) return 0; } + +int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task) +{ + int i; + parsec_task_t *task = gpu_task->ec; + + for(i = 0; i < task->task_class->nb_flows; i++) + { + if (task->data[i].data_in == NULL) + continue; + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + + PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); + + } + + return 0; +} + + +int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device) +{ + int i; + migrated_data_t *migrated_data_item = NULL; + parsec_task_t *task = migrated_gpu_task->ec; + migrated_data_item = (migrated_data_t *) calloc(1, sizeof(migrated_data_t)); + migrated_data_item->dealer_device = dealer_device; + migrated_data_item->ht_item.key = (parsec_key_t) task->task_class->make_key((const parsec_taskpool_t*)task->taskpool, + (const parsec_assignment_t*)&task->locals); + for( i = 0; i < task->task_class->nb_flows; i++) + { + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & migrated_gpu_task->flow[i]->flow_flags)) //CTL flow + { + migrated_data_item->old_copy[i] = NULL; + continue; + } + + if(task->data->data_out == NULL) + migrated_data_item->old_copy[i] = NULL; + else + migrated_data_item->old_copy[i] = task->data->data_out; + } + + parsec_hash_table_lock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); + parsec_hash_table_nolock_insert(migrated_data_hash_table, &migrated_data_item->ht_item); + parsec_hash_table_unlock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); + + return 1; +} + +int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task) +{ + int i; + migrated_data_t* migrated_data_item = NULL; + parsec_task_t* task = migrated_gpu_task->ec; + parsec_key_t key; + + + key = (parsec_key_t) migrated_gpu_task->ec->task_class->make_key((const parsec_taskpool_t*)migrated_gpu_task->ec->taskpool, + (const parsec_assignment_t*)&migrated_gpu_task->ec->locals); + + parsec_hash_table_lock_bucket(migrated_data_hash_table, key); + migrated_data_item = (migrated_data_t*) parsec_hash_table_nolock_remove(migrated_data_hash_table, key); + parsec_hash_table_unlock_bucket(migrated_data_hash_table, key); + + if( migrated_data_item != NULL) + { + if( migrated_gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + { + for( i = 0; i < task->task_class->nb_flows; i++) + { + if(migrated_data_item->old_copy[i] == NULL) + continue; + + parsec_data_t* original = migrated_data_item->old_copy[i]->original; + parsec_atomic_lock( &original->lock ); + + if( (PARSEC_FLOW_ACCESS_READ & migrated_gpu_task->flow[i]->flow_flags) ) + { + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(migrated_data_item->old_copy[i]); + } + + parsec_list_push_back(&migrated_data_item->dealer_device->gpu_mem_lru, + (parsec_list_item_t*)migrated_data_item->old_copy[i]); + + parsec_atomic_unlock( &original->lock ); + } + } + + free(migrated_data_item); + } + + + return 1; +} diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 561e89ca2..3014ed102 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -95,6 +95,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); +int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 9ea9b2981..13c3ee7d0 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -929,6 +929,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) gpu_elem = PARSEC_OBJ_NEW(parsec_data_copy_t); + PARSEC_DATA_COPY_READERS_SET_ZERO(gpu_elem); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:%s: Allocate CUDA copy %p sz %d[ref_count %d] for data %p", gpu_device->super.name, task_name, @@ -1960,7 +1961,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, gpu_copy->readers, gpu_copy->device_index, gpu_copy->version, gpu_copy->flags, gpu_copy->coherency_state, gpu_copy->data_transfer_status); //gpu_copy->readers--; - PARSEC_DATA_COPY_DEC_READERS_ATOMOC(gpu_copy); + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(gpu_copy); if( 0 == gpu_copy->readers ) { parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); @@ -2216,8 +2217,8 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } - if( gpu_task->migrate_status > TASK_NOT_MIGRATED ) - migrate_hash_table_delete(gpu_task); + if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN ) + gpu_data_compensate_reader(gpu_task); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", @@ -2385,7 +2386,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { - assert( 0 == gpu_copy->readers ); + //assert( 0 == gpu_copy->readers ); } } parsec_atomic_unlock(&original->lock); @@ -2478,7 +2479,7 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; - assert( 0 == gpu_copy->readers ); + //assert( 0 == gpu_copy->readers ); if( gpu_task->pushout & (1 << i) ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, From 1887275a60c05b1713bda3a4e5c59a5be0fb161c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 25 May 2022 04:22:10 +1000 Subject: [PATCH 055/215] reader macros updated --- parsec/data_internal.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/parsec/data_internal.h b/parsec/data_internal.h index 891d711cc..1eb0b93cf 100644 --- a/parsec/data_internal.h +++ b/parsec/data_internal.h @@ -17,7 +17,11 @@ assert(COPY->readers >= 0); \ }while(0); \ -#define PARSEC_DATA_COPY_INC_READERS(COPY) COPY->readers++; +#define PARSEC_DATA_COPY_INC_READERS(COPY) \ + do{ \ + COPY->readers++; \ + assert(COPY->readers >= 0); \ + }while(0); \ #define PARSEC_DATA_COPY_DEC_READERS_ATOMIC(COPY) \ do{ \ @@ -25,7 +29,19 @@ assert(COPY->readers >= 0); \ }while(0); \ -#define PARSEC_DATA_COPY_INC_READERS_ATOMIC(COPY) parsec_atomic_fetch_inc_int32( ©->readers ); +#define PARSEC_DATA_COPY_INC_READERS_ATOMIC(COPY) \ + do{ \ + parsec_atomic_fetch_inc_int32( ©->readers ); \ + assert(COPY->readers >= 0); \ + }while(0); + +#define PARSEC_DATA_COPY_READERS_SET_ZERO(COPY) \ + do{ \ + COPY->readers = 0; \ + assert(COPY->readers == 0); \ + }while(0); + + /** @addtogroup parsec_internal_data * @{ From 020a54d50bb85cc96b091fbcdd82a4494bc1f3f2 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 25 May 2022 08:54:16 +1000 Subject: [PATCH 056/215] protocol updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 49 +++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 0a76a9e96..df6fdc5af 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -446,6 +446,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t //staged in data is already available it data_out task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + PARSEC_OBJ_RETAIN(task->data[i].data_in); /** * @brief If the task only has write access remove it from owned LRU and add @@ -455,10 +456,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + assert(task->data[i].data_in->readers > 0); + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } /** * @brief For read_write the flow the readers will already be inceremnetd @@ -467,6 +469,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { + assert(task->data[i].data_in->readers > 0); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); @@ -474,6 +477,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } assert(task->data[i].data_in->original == task->data[i].data_out->original); + assert( task->data[i].data_in->original != NULL); if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) assert( task->data[i].data_out->version == task->data[i].data_in->version); assert(task->data[i].data_out != NULL); @@ -483,6 +487,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert( task->data[i].data_out->version == task->data[i].data_in->version); if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) assert(task->data[i].data_out->version == task->data[i].data_in->version); + assert(task->data[i].data_in->device_index == dealer_device->super.device_index); parsec_atomic_unlock( &original->lock ); @@ -515,7 +520,24 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } } - //migrate_hash_table_insert(gpu_task, dealer_device); + return 0; +} + +int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task) +{ + int i; + parsec_task_t *task = gpu_task->ec; + + for(i = 0; i < task->task_class->nb_flows; i++) + { + if (task->data[i].data_in == NULL) + continue; + if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + continue; + + PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); + + } return 0; } @@ -548,24 +570,7 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task) return 0; } -int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task) -{ - int i; - parsec_task_t *task = gpu_task->ec; - - for(i = 0; i < task->task_class->nb_flows; i++) - { - if (task->data[i].data_in == NULL) - continue; - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow - continue; - - PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); - - } - return 0; -} int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device) From f904fab212684d98ced91fc87b41862bfbe1338e Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 25 May 2022 09:25:33 +1000 Subject: [PATCH 057/215] data coherency of staged in data changed from shared to exclusive. --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index df6fdc5af..fa4f7b01d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -445,7 +445,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t //staged in data is already available it data_out task->data[i].data_in = task->data[i].data_out; - task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE; PARSEC_OBJ_RETAIN(task->data[i].data_in); /** From 044e164827c6b69ce86f3f43a5ff15b17f22cc9b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 26 May 2022 07:04:23 +1000 Subject: [PATCH 058/215] Readers incremnted for each flow of a staged_in task as the reader will be decremented by parsec_gpu_callback_complete_push(), as each transfer in the second strage_in is D2D. --- parsec/mca/device/cuda/device_cuda_migrate.c | 35 ++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index fa4f7b01d..0e42d2cbb 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -469,12 +469,18 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers > 0); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + assert(task->data[i].data_in->readers > 0); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } + if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + } assert(task->data[i].data_in->original == task->data[i].data_out->original); assert( task->data[i].data_in->original != NULL); @@ -523,7 +529,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t return 0; } -int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task) +int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device) { int i; parsec_task_t *task = gpu_task->ec; @@ -535,7 +541,24 @@ int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task) if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow continue; - PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); + parsec_atomic_lock( &task->data[i].data_in->original->lock ); + + PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); + //PARSEC_OBJ_RELEASE(task->data[i].data_in); + parsec_device_gpu_module_t *src_device = + (parsec_device_gpu_module_t*)parsec_mca_device_get( task->data[i].data_in->device_index ); + if(0 == task->data[i].data_in->readers) + { + assert( ((parsec_list_item_t*)task->data[i].data_in)->list_next != (parsec_list_item_t*)task->data[i].data_in ); + assert( ((parsec_list_item_t*)task->data[i].data_in)->list_prev != (parsec_list_item_t*)task->data[i].data_in ); + + //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + + } + + parsec_atomic_unlock( &task->data[i].data_in->original->lock ); } From 43aab9993cd3c2d23b0caae0b5c057ba17904c4a Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 26 May 2022 07:09:27 +1000 Subject: [PATCH 059/215] gpu_data_compensate_reader() signature changed. --- parsec/mca/device/cuda/device_cuda_migrate.h | 3 ++- parsec/mca/device/cuda/device_cuda_module.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 3014ed102..21218c253 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -95,7 +95,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); -int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task); +int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); + diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 13c3ee7d0..6b854c690 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2218,7 +2218,7 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN ) - gpu_data_compensate_reader(gpu_task); + gpu_data_compensate_reader(gpu_task, gpu_device); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", From fbe0aed623f302be068380c947d59e820a532d6f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 26 May 2022 08:00:10 +1000 Subject: [PATCH 060/215] Reader increment required only for data flow with only write access. gpu_data_compensate_reader not required as reader decrement will be taken care of by parsec_gpu_callback_complete_push after the second stage_in. --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 +++--- parsec/mca/device/cuda/device_cuda_module.c | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 0e42d2cbb..f7c4630a2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -469,7 +469,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); assert(task->data[i].data_in->readers > 0); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); @@ -479,7 +479,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); } assert(task->data[i].data_in->original == task->data[i].data_out->original); @@ -543,7 +543,7 @@ int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo parsec_atomic_lock( &task->data[i].data_in->original->lock ); - PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); + //PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); //PARSEC_OBJ_RELEASE(task->data[i].data_in); parsec_device_gpu_module_t *src_device = (parsec_device_gpu_module_t*)parsec_mca_device_get( task->data[i].data_in->device_index ); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 6b854c690..1f7652e45 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2217,8 +2217,8 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } - if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN ) - gpu_data_compensate_reader(gpu_task, gpu_device); + //if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN ) + // gpu_data_compensate_reader(gpu_task, gpu_device); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", @@ -2386,7 +2386,8 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { - //assert( 0 == gpu_copy->readers ); + //assert( 0 == gpu_copy->readers ); + PARSEC_DATA_COPY_READERS_SET_ZERO(gpu_copy); } } parsec_atomic_unlock(&original->lock); From 1ae43520863ee85ec06bfd81aea6c7cb95e47046 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 27 May 2022 04:23:02 +1000 Subject: [PATCH 061/215] When we are migrating of the data is already in the destination GPU we have to compensate for the reader increment made during the first stage_in --- parsec/mca/device/cuda/device_cuda_module.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 1f7652e45..c76025f89 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1423,6 +1423,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* Do not need to be tranferred */ if( -1 == transfer_from ) { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; + + /** + * When we are migrating of the data is already in the destination + * GPU we have to compensate for the reader increment made during + * the first stage_in + */ + if(gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); } else { /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; @@ -2072,7 +2080,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, * to here. This will make sure that tasks update the version * only if it is sure to execute in the corresponding GPU. */ - gpu_data_version_increment(task); + gpu_data_version_increment(task, gpu_device); #if defined(PARSEC_DEBUG_PARANOID) int i; const parsec_flow_t *flow; @@ -2386,8 +2394,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { - //assert( 0 == gpu_copy->readers ); - PARSEC_DATA_COPY_READERS_SET_ZERO(gpu_copy); + assert( 0 == gpu_copy->readers ); } } parsec_atomic_unlock(&original->lock); @@ -2480,7 +2487,7 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; - //assert( 0 == gpu_copy->readers ); + assert( 0 == gpu_copy->readers ); if( gpu_task->pushout & (1 << i) ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, @@ -2797,8 +2804,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec = NULL; goto remove_gpu_task; } - //if( gpu_task->migrate_status > TASK_NOT_MIGRATED ) - // migrate_hash_table_delete(gpu_task); parsec_cuda_kernel_epilog( gpu_device, gpu_task ); __parsec_complete_execution( es, gpu_task->ec ); From 23011598c049a23167179fad963ceca4eb6e96a3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 04:26:46 +1000 Subject: [PATCH 062/215] gpu_data_version_increment() signature updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index f7c4630a2..1fdd11ddf 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -565,7 +565,7 @@ int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo return 0; } -int gpu_data_version_increment(parsec_gpu_task_t *gpu_task) +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device) { int i; parsec_task_t *task = gpu_task->ec; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 21218c253..f8a48c54c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -92,7 +92,7 @@ int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* sr parsec_device_gpu_module_t* dest_dev); int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, int stage_in_status); -int gpu_data_version_increment(parsec_gpu_task_t *gpu_task); +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); From 8f79706846ef576faba4e1a9255241f6b786b6f8 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 04:28:49 +1000 Subject: [PATCH 063/215] Coherence changed to PARSEC_DATA_COHERENCY_SHARED and the data to be migrated not added to any new LRU list. --- parsec/mca/device/cuda/device_cuda_migrate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1fdd11ddf..1586100f4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -445,7 +445,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t //staged in data is already available it data_out task->data[i].data_in = task->data[i].data_out; - task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE; + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; PARSEC_OBJ_RETAIN(task->data[i].data_in); /** @@ -458,9 +458,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { assert(task->data[i].data_in->readers > 0); PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } /** * @brief For read_write the flow the readers will already be inceremnetd @@ -471,9 +471,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); assert(task->data[i].data_in->readers > 0); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && From a4f37d6cf3b3897ad49471ef414da7792b342fe6 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 04:39:05 +1000 Subject: [PATCH 064/215] Task once migrated are not migrated again --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1586100f4..50ec557ff 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -369,7 +369,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module * @brief if the GPU task is a not a computational kerenel * stop migration. */ - if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL) + if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { if(execution_level == 0) { From 81dfdcbab6e8b2327d3874b7a343492ed2513a8d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 05:32:03 +1000 Subject: [PATCH 065/215] LRU push corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 50ec557ff..bd75b7c56 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -458,9 +458,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { assert(task->data[i].data_in->readers > 0); PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); } /** * @brief For read_write the flow the readers will already be inceremnetd @@ -471,9 +471,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); assert(task->data[i].data_in->readers > 0); - //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); } if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && From 32e253fc2bc89de6ef86db42ddf295b1434e634b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 05:43:32 +1000 Subject: [PATCH 066/215] level1 conditions updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 43 +++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index bd75b7c56..c05c0cde9 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -325,7 +325,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return 0; starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - #if 0 + //#if 0 migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( &(dealer_device->pending) ); //level 0 execution_level = 0; if(migrated_gpu_task == NULL) @@ -346,18 +346,18 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module } } } - #endif - - for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) - { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 - if(migrated_gpu_task != NULL) - { - execution_level = 2; - stream_index = 2 + j; - break; - } - } + //#endif + + //for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) + //{ + // migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 + // if(migrated_gpu_task != NULL) + // { + // execution_level = 2; + // stream_index = 2 + j; + // break; + // } + //} if(migrated_gpu_task != NULL) @@ -507,20 +507,23 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t */ if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) continue; - if( task->data[i].data_in->version == task->data[i].data_out->version) - continue; - - task->data[i].data_in = task->data[i].data_out; + //if( task->data[i].data_in->version == task->data[i].data_out->version) + // continue; + // + //task->data[i].data_in = task->data[i].data_out; + + + if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) + assert( task->data[i].data_out->version == task->data[i].data_out->original->device_copies[0]->version); parsec_data_t* original = task->data[i].data_in->original; parsec_atomic_lock( &original->lock ); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); assert(task->data[i].data_in->readers >= 0); task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - task->data[i].data_in->data_transfer_status = PARSEC_DATA_STATUS_SHOULD_MIGRATE; parsec_atomic_unlock( &original->lock ); } From 4e9109750ecc643a784d9293e18f753143889509 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 May 2022 06:30:04 +1000 Subject: [PATCH 067/215] updated documentation and minor code changes --- parsec/mca/device/cuda/device_cuda_migrate.c | 165 ++++++++----------- parsec/mca/device/cuda/device_cuda_migrate.h | 2 +- parsec/mca/device/cuda/device_cuda_module.c | 36 ++-- 3 files changed, 85 insertions(+), 118 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c05c0cde9..c0d63fcaf 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -278,8 +278,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) /** - * This function migrate a specific task from a device a - * to another. + * This function enqueues the migrated task to a node level queue. * * Returns: negative number if any error occured. * positive: starving device index. @@ -290,7 +289,6 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; parsec_device_gpu_module_t* starving_device = mig_task->starving_device; - char tmp[MAX_TASK_STRLEN]; PARSEC_DEBUG_VERBOSE(10, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); @@ -325,7 +323,10 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return 0; starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - //#if 0 + /** + * @brief Tasks are searched in different levels one by one. At this point we assume + * that the cost of migration increases, as the level increase. + */ migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( &(dealer_device->pending) ); //level 0 execution_level = 0; if(migrated_gpu_task == NULL) @@ -346,18 +347,6 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module } } } - //#endif - - //for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) - //{ - // migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 - // if(migrated_gpu_task != NULL) - // { - // execution_level = 2; - // stream_index = 2 + j; - // break; - // } - //} if(migrated_gpu_task != NULL) @@ -366,8 +355,8 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); /** - * @brief if the GPU task is a not a computational kerenel - * stop migration. + * @brief if the task is a not a computational kerenel or if it is a task that has + * already been migrated, we stop the migration. */ if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { @@ -398,12 +387,22 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module nb_migrated++; parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device - //change migrate_status + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ if( execution_level == 2 ) migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; else migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + /** + * @brief An object of type migrated_task_t is created store the migrated task + * and other associated details. This object is enqueued to a node level queue. + * The main objective of this was to make sure that the manager does not have to sepend + * time on migration. It can select the task for migration, enqqueue it to the node level + * queue and then return to its normal working. + */ mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; @@ -424,6 +423,16 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module return nb_migrated; } +/** + * @brief This function changes the features of a task, in a way that it is preped + * for migration. + * + * @param gpu_task + * @param dealer_device + * @param stage_in_status + * @return int + */ + int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, int stage_in_status) { @@ -438,50 +447,59 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow continue; - if( stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN ) + /** + * Data is already staged in the dealer device and we can find all the data + * of the tasks to be migrated in the dealer device. + */ + if( stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN ) { parsec_data_t* original = task->data[i].data_out->original; parsec_atomic_lock( &original->lock ); - //staged in data is already available it data_out + /** + * @brief If the task is stage in the data is already available in the + * dealer GPU. So we can set data_in = data_out, in order to make sure + * that the source data for the second stage in is always selected as the + * data in the delaer GPU. + */ task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; PARSEC_OBJ_RETAIN(task->data[i].data_in); /** - * @brief If the task only has write access remove it from owned LRU and add - * it to mem LRU. Increment the reader to make usre the data will - * not be evicted. + * @brief If the task only WRITE access, then we have to increment the + * reader of the data_in, so that it does not go into negative value + * when we call complete_stage( parsec_gpu_callback_complete_push() ) + * after the second stage in of the task is completed (on the starving device). + * + * If the task only as READ access it is already in the gpu_mem_owned_lru of + * the dealer device. If it has WRITE and READ-WRITE access we move the data + * to gpu_mem_owned_lru. */ + if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + assert( task->data[i].data_in->readers > 0 ); + } if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers > 0); + assert(task->data[i].data_in->readers >= 0); PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); } - /** - * @brief For read_write the flow the readers will already be inceremnetd - * but, the data will be in the owned LRU move it to the mem LRU - */ if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); assert(task->data[i].data_in->readers > 0); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); } - if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) - { - //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - } - + assert(task->data[i].data_in->original == task->data[i].data_out->original); assert( task->data[i].data_in->original != NULL); if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) @@ -499,74 +517,29 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } + /** + * Data is not yet staged in the dealer device, but some of the data we need maybe + * already in the delaer device and the dealer device may have the latest data. + * In that case we set data_in = data_out. + */ else { - /** - * The data GPU will be the owner of the data, only if the task executing on that GPU - * has a write permission on it. (data.c line 423) - */ - if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) - continue; - //if( task->data[i].data_in->version == task->data[i].data_out->version) - // continue; - // - //task->data[i].data_in = task->data[i].data_out; - - - if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) - assert( task->data[i].data_out->version == task->data[i].data_out->original->device_copies[0]->version); - parsec_data_t* original = task->data[i].data_in->original; - parsec_atomic_lock( &original->lock ); - - //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - - assert(task->data[i].data_in->readers >= 0); - - task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - - parsec_atomic_unlock( &original->lock ); + if(task->data[i].data_out->original->owner_device == dealer_device->super.device_index && + (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version) ) + { + parsec_data_t* original = task->data[i].data_out->original; + parsec_atomic_lock( &original->lock ); + task->data[i].data_in = task->data[i].data_out; + task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + PARSEC_OBJ_RETAIN(task->data[i].data_in); + parsec_atomic_unlock( &original->lock ); + } } } return 0; } -int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device) -{ - int i; - parsec_task_t *task = gpu_task->ec; - - for(i = 0; i < task->task_class->nb_flows; i++) - { - if (task->data[i].data_in == NULL) - continue; - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow - continue; - - parsec_atomic_lock( &task->data[i].data_in->original->lock ); - - //PARSEC_DATA_COPY_DEC_READERS_ATOMIC( task->data[i].data_in ); - //PARSEC_OBJ_RELEASE(task->data[i].data_in); - parsec_device_gpu_module_t *src_device = - (parsec_device_gpu_module_t*)parsec_mca_device_get( task->data[i].data_in->device_index ); - if(0 == task->data[i].data_in->readers) - { - assert( ((parsec_list_item_t*)task->data[i].data_in)->list_next != (parsec_list_item_t*)task->data[i].data_in ); - assert( ((parsec_list_item_t*)task->data[i].data_in)->list_prev != (parsec_list_item_t*)task->data[i].data_in ); - - //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); - - } - - parsec_atomic_unlock( &task->data[i].data_in->original->lock ); - - } - - return 0; -} int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device) { diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index f8a48c54c..6b33a7618 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -95,7 +95,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); -int gpu_data_compensate_reader(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); + diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index c76025f89..439467d3f 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1300,19 +1300,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, */ if( PARSEC_FLOW_ACCESS_WRITE & type ) { if (gpu_elem->readers > 0 ) { - //we are migrating the data of task that has already been staged in. - // So we have incremented the reader for this data in change_task_features(). - //if(gpu_task->migrate_status > TASK_NOT_MIGRATED) - //{ - // if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { - // parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " - // "(possible anti-dependency,\n" - // "or concurrent accesses), please prevent that with CTL dependencies\n", - // gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); - // parsec_atomic_unlock( &original->lock ); - // return -1; - // } - //} + if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { + parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " + "(possible anti-dependency,\n" + "or concurrent accesses), please prevent that with CTL dependencies\n", + gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); + parsec_atomic_unlock( &original->lock ); + return -1; + } } PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tDetach writable CUDA copy %p [ref_count %d] from any lists", @@ -1326,10 +1321,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * Current limitations: only for read-only data used read-only on the hosting GPU. */ parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) - || (gpu_task->migrate_status > TASK_NOT_MIGRATED)) + || (gpu_task->migrate_status > TASK_NOT_MIGRATED) /* make sure limitation does not affect migrated tasks */) { - parsec_data_status_t old_status = in_elem->data_transfer_status; - int potential_alt_src = 0; if( PARSEC_DEV_CUDA == in_elem_dev->super.super.type ) { if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { @@ -2078,7 +2071,8 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, /** * The change in data versioning is moved from stage_in step * to here. This will make sure that tasks update the version - * only if it is sure to execute in the corresponding GPU. + * only if it is sure to execute in the corresponding GPU, in + * the last moment. */ gpu_data_version_increment(task, gpu_device); #if defined(PARSEC_DEBUG_PARANOID) @@ -2225,9 +2219,6 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } - //if( gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN ) - // gpu_data_compensate_reader(gpu_task, gpu_device); - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", gpu_device->super.name, @@ -2758,8 +2749,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * @brief Before a new task is selectd by the device manager for execution, * the manager checks if there are any starving devices and migrate tasks, * to the starving device, if there are available tasks to migrate. + * + * rc will return the total number of tasks selected for migration and that + * is deducted from the total number of tasks that will be executed by this + * GPU. */ - //printf("Available tasks %d \n", gpu_device->mutex); rc = migrate_if_starving(es, gpu_device); if( rc > 0) parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); From ec4dce7588e7b98ea6ff666803ff4b3fc8f04c8f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 29 May 2022 00:35:37 +1000 Subject: [PATCH 068/215] Increment reader to compensate for parsec_gpu_callback_complete_push() --- parsec/mca/device/cuda/device_cuda_migrate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c0d63fcaf..3f0daff93 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -531,6 +531,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_lock( &original->lock ); task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); PARSEC_OBJ_RETAIN(task->data[i].data_in); parsec_atomic_unlock( &original->lock ); } From 4e44a0fb46085852cccce2615be28446e8fd03f5 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 29 May 2022 01:02:04 +1000 Subject: [PATCH 069/215] Code correction: assert disabled and restored parsec_get_best_device() --- parsec/mca/device/cuda/device_cuda_module.c | 2 +- parsec/mca/device/device.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 439467d3f..2e0f17421 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -796,7 +796,7 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de * before we get here (aka below parsec_fini), the destructor of the data * collection must have been called, releasing all the copies. */ - PARSEC_OBJ_RELEASE(gpu_copy); assert(NULL == gpu_copy); + PARSEC_OBJ_RELEASE(gpu_copy); //assert(NULL == gpu_copy); } } diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index dccb8fce0..58e6fcfa7 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -168,7 +168,7 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } - dev_index = 2; //CHANGE THIS, only for testing all task mapped to first GPU device + //dev_index = 2; //CHANGE THIS, only for testing all task mapped to first GPU device return dev_index; } From e34165e39a546eed87ab730a8bb128acd69c9a98 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 31 May 2022 05:15:41 +1000 Subject: [PATCH 070/215] MPI_Wtime() added --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3f0daff93..ed267031f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -7,6 +7,8 @@ static int NDEVICES; migration_accounting_t* accounting; static parsec_hash_table_t *migrated_data_hash_table = NULL; +double start = 0; +double end = 0; PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); @@ -26,6 +28,8 @@ int parsec_cuda_migrate_init(int ndevices) nvmlReturn_t nvml_ret; #endif + start = MPI_Wtime(); + NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); accounting = (migration_accounting_t *) calloc(ndevices, sizeof(migration_accounting_t)); @@ -69,6 +73,8 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini() { int i; + + end = MPI_Wtime(); #if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); @@ -83,6 +89,7 @@ int parsec_cuda_migrate_fini() accounting[i].level0 + accounting[i].level1 + accounting[i].level2); printf("Task received %d \n", accounting[i].received); } + printf("---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -333,6 +340,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module { migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( dealer_device->exec_stream[0]->fifo_pending ); //level 1 execution_level = 1; + if( migrated_gpu_task == NULL) { for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) From 882aaec82431c5f83734178407eaed5d5fdb88d4 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 1 Jun 2022 00:16:24 +1000 Subject: [PATCH 071/215] timer added. Also changes to parsec_cuda_get_device_task() --- parsec/mca/device/cuda/device_cuda_migrate.c | 11 +++++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 1 + parsec/mca/device/cuda/device_cuda_module.c | 8 ++++++++ 3 files changed, 20 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ed267031f..2923223e5 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -101,6 +101,12 @@ int parsec_cuda_migrate_fini() } + +double current_time() +{ + return ( MPI_Wtime() - start); +} + /** * @brief returns the load of a particular device * @@ -157,6 +163,11 @@ int parsec_cuda_set_device_load(int device, int load) int parsec_cuda_get_device_task(int device, int level) { + if( level == 3) + return (device_info[device].task_count[0] + + device_info[device].task_count[1] + + device_info[device].task_count[2]); + return device_info[device].task_count[level]; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 6b33a7618..45b56e9d3 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -95,6 +95,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); +double current_time(); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 2e0f17421..b025faaf9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2745,6 +2745,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, fetch_task_from_shared_queue: + printf(" time %lf device %d level0 %d level1 %d level2 %d total %d \n", + current_time(), + CUDA_DEVICE_NUM(gpu_device->super.device_index), + parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), + parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), + parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), + parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 3)); + /** * @brief Before a new task is selectd by the device manager for execution, * the manager checks if there are any starving devices and migrate tasks, From c62ad2f327c909963b4a886a0196faf618ccf18b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 1 Jun 2022 01:49:42 +1000 Subject: [PATCH 072/215] counting corrected to accomodate different levels --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++++++ parsec/mca/device/cuda/device_cuda_module.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 2923223e5..f06374c87 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -87,7 +87,13 @@ int parsec_cuda_migrate_fini() printf("Tasks migrated: level0 %d, level1 %d, level2 %d (Total %d)\n", accounting[i].level0, accounting[i].level1, accounting[i].level2, accounting[i].level0 + accounting[i].level1 + accounting[i].level2); + printf("Task check: level0 %d level1 %d level2 %d total %d \n", + parsec_cuda_get_device_task(i, 0), + parsec_cuda_get_device_task(i, 1), + parsec_cuda_get_device_task(i, 2), + parsec_cuda_get_device_task(i, 3)); printf("Task received %d \n", accounting[i].received); + } printf("---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index b025faaf9..8a1a7c855 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2013,6 +2013,18 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if( NULL != task ) { PARSEC_PUSH_TASK(stream->fifo_pending, (parsec_list_item_t*)task); task = NULL; + + if(stream == gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); + } + else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); + } + } *out_task = NULL; progress_fct = upstream_progress_fct; @@ -2811,7 +2823,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, __parsec_complete_execution( es, gpu_task->ec ); gpu_device->super.executed_tasks++; - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); remove_gpu_task: From 8e6e805e33c466802abeb0bf1cfc949c8d53e326 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 1 Jun 2022 03:31:44 +1000 Subject: [PATCH 073/215] task migrated based on the total tasks in each level. will_starve() used to check if migratioin will result in dealer starvation. --- parsec/mca/device/cuda/device_cuda_migrate.c | 18 ++++++++++++++---- parsec/mca/device/cuda/device_cuda_module.c | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index f06374c87..3c40aa59c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -91,7 +91,7 @@ int parsec_cuda_migrate_fini() parsec_cuda_get_device_task(i, 0), parsec_cuda_get_device_task(i, 1), parsec_cuda_get_device_task(i, 2), - parsec_cuda_get_device_task(i, 3)); + parsec_cuda_get_device_task(i, -1)); printf("Task received %d \n", accounting[i].received); } @@ -169,7 +169,7 @@ int parsec_cuda_set_device_load(int device, int load) int parsec_cuda_get_device_task(int device, int level) { - if( level == 3) + if( level == -1) return (device_info[device].task_count[0] + device_info[device].task_count[1] + device_info[device].task_count[2]); @@ -216,8 +216,16 @@ int parsec_cuda_tasks_executed(int device) */ int is_starving(int device) { - //if( device_info[device].load < 1 && device_info[device].task_count < 1 ) - if( device_info[device].task_count[/* level */ 0] < 1 ) + if( parsec_cuda_get_device_task(device, -1) < 1 ) + return 1; + else + return 0; +} + + +int will_starve(int device) +{ + if( parsec_cuda_get_device_task(device, -1) < 3) return 1; else return 0; @@ -236,6 +244,8 @@ int is_starving(int device) int find_starving_device(int dealer_device) { int i; + if( will_starve(dealer_device) ) + return -1; for(i = 0; i < NDEVICES; i++) { diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 8a1a7c855..a7e74a043 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2763,7 +2763,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), - parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 3)); + parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); /** * @brief Before a new task is selectd by the device manager for execution, From ece493e6525d17a92fd0be680f63450cb0593ad7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 1 Jun 2022 04:27:15 +1000 Subject: [PATCH 074/215] task count corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 26 +++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3c40aa59c..6a5f011ff 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -82,20 +82,20 @@ int parsec_cuda_migrate_fini() for(i = 0; i < NDEVICES; i++) { - printf("*********** DEVICE %d *********** \n", i); + printf("\n*********** DEVICE %d *********** \n", i); printf("Total tasks executed: %d \n", accounting[i].total_tasks_executed); - printf("Tasks migrated: level0 %d, level1 %d, level2 %d (Total %d)\n", + printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", accounting[i].level0, accounting[i].level1, accounting[i].level2, accounting[i].level0 + accounting[i].level1 + accounting[i].level2); - printf("Task check: level0 %d level1 %d level2 %d total %d \n", + printf("Task check : level0 %d level1 %d level2 %d total %d \n", parsec_cuda_get_device_task(i, 0), parsec_cuda_get_device_task(i, 1), parsec_cuda_get_device_task(i, 2), parsec_cuda_get_device_task(i, -1)); - printf("Task received %d \n", accounting[i].received); + printf("Task received : %d \n", accounting[i].received); } - printf("---------Execution time = %lf ------------ \n", end - start); + printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -414,13 +414,21 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module assert( (migrated_gpu_task != NULL) && (migrated_gpu_task->ec != NULL) ); if(execution_level == 0) - accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level0++; + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + accounting[dealer_device_index].level0++; + } if(execution_level == 1) - accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level1++; + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + accounting[dealer_device_index].level1++; + } if(execution_level == 2) - accounting[CUDA_DEVICE_NUM(dealer_device->super.device_index)].level2++; + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + accounting[dealer_device_index].level2++; + } nb_migrated++; - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); // decrement task count at the dealer device /** * @brief change migrate_status according to the status of the stage in of the From dd6d7d470fa402edc0bc8e97004a70228739eb25 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 1 Jun 2022 08:46:50 +1000 Subject: [PATCH 075/215] parsec_atomic_fetch_add_int32() changed to parsec_atomic_cas_int32() deadlock. --- parsec/mca/device/cuda/device_cuda_module.c | 30 +++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index a7e74a043..2ad4f8b8b 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2601,7 +2601,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device; parsec_device_cuda_module_t *cuda_device; cudaError_t status; - int rc, exec_stream = 0; + int rc, exec_stream = 0, nb_migrated = 0; parsec_gpu_task_t *progress_task, *out_task_submit = NULL, *out_task_pop = NULL; #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; @@ -2757,13 +2757,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, fetch_task_from_shared_queue: - printf(" time %lf device %d level0 %d level1 %d level2 %d total %d \n", - current_time(), - CUDA_DEVICE_NUM(gpu_device->super.device_index), - parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), - parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), - parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), - parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); + //printf(" time %lf device %d level0 %d level1 %d level2 %d total %d \n", + // current_time(), + // CUDA_DEVICE_NUM(gpu_device->super.device_index), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); /** * @brief Before a new task is selectd by the device manager for execution, @@ -2774,9 +2774,17 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * is deducted from the total number of tasks that will be executed by this * GPU. */ - rc = migrate_if_starving(es, gpu_device); - if( rc > 0) - parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * rc); + nb_migrated = migrate_if_starving(es, gpu_device); + if( nb_migrated > 0 ) + { + while(1) + { + rc = gpu_device->mutex; + if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc - nb_migrated ) ) + break; + } + } + assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { From 57c64c0d3117ddcccf1ccd69ff6a6b9535e0c596 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 2 Jun 2022 02:01:07 +1000 Subject: [PATCH 076/215] code cleanup --- parsec/mca/device/cuda/device_cuda_migrate.c | 138 +++---------------- parsec/mca/device/cuda/device_cuda_migrate.h | 33 +---- parsec/mca/device/cuda/device_cuda_module.c | 3 - 3 files changed, 25 insertions(+), 149 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 6a5f011ff..3baa4d125 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,8 +4,6 @@ extern int parsec_device_cuda_enabled; parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; -migration_accounting_t* accounting; -static parsec_hash_table_t *migrated_data_hash_table = NULL; double start = 0; double end = 0; @@ -32,7 +30,6 @@ int parsec_cuda_migrate_init(int ndevices) NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); - accounting = (migration_accounting_t *) calloc(ndevices, sizeof(migration_accounting_t)); migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t);; for(i = 0; i < NDEVICES; i++) @@ -41,23 +38,17 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].task_count[j] = 0; device_info[i].load = 0; - accounting[i].level0 = 0; - accounting[i].level1 = 0; - accounting[i].level2 = 0; - accounting[i].total_tasks_executed = 0; - accounting[i].received = 0; + device_info[i].level0 = 0; + device_info[i].level1 = 0; + device_info[i].level2 = 0; + device_info[i].total_tasks_executed = 0; + device_info[i].received = 0; } #if defined(PARSEC_HAVE_CUDA) nvml_ret = nvmlInit_v2(); #endif - migrated_data_hash_table = PARSEC_OBJ_NEW(parsec_hash_table_t); - parsec_hash_table_init(migrated_data_hash_table, - offsetof(migrated_data_t, ht_item), - 8, migrated_data_key_fns, NULL); - - char hostname[256]; gethostname(hostname, sizeof(hostname)); printf("PID %d on %s ready for attach\n", getpid(), hostname); @@ -83,24 +74,22 @@ int parsec_cuda_migrate_fini() for(i = 0; i < NDEVICES; i++) { printf("\n*********** DEVICE %d *********** \n", i); - printf("Total tasks executed: %d \n", accounting[i].total_tasks_executed); + printf("Total tasks executed: %d \n", device_info[i].total_tasks_executed); printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", - accounting[i].level0, accounting[i].level1, accounting[i].level2, - accounting[i].level0 + accounting[i].level1 + accounting[i].level2); + device_info[i].level0, device_info[i].level1, device_info[i].level2, + device_info[i].level0 + device_info[i].level1 + device_info[i].level2); printf("Task check : level0 %d level1 %d level2 %d total %d \n", parsec_cuda_get_device_task(i, 0), parsec_cuda_get_device_task(i, 1), parsec_cuda_get_device_task(i, 2), parsec_cuda_get_device_task(i, -1)); - printf("Task received : %d \n", accounting[i].received); + printf("Task received : %d \n", device_info[i].received); } printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); - parsec_hash_table_fini(migrated_data_hash_table); - printf("Migration module shut down \n"); return 0; @@ -202,7 +191,7 @@ int parsec_cuda_set_device_task(int device, int task_count, int level) int parsec_cuda_tasks_executed(int device) { - int rc = parsec_atomic_fetch_add_int32(&(accounting[device].total_tasks_executed), 1); + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].total_tasks_executed), 1); return rc + 1; } @@ -244,8 +233,6 @@ int will_starve(int device) int find_starving_device(int dealer_device) { int i; - if( will_starve(dealer_device) ) - return -1; for(i = 0; i < NDEVICES; i++) { @@ -298,7 +285,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) change_task_features(migrated_gpu_task, dealer_device, stage_in_status); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); - parsec_atomic_fetch_inc_int32(&accounting[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); + parsec_atomic_fetch_inc_int32(&device_info[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, starving_device->super.device_index); PARSEC_OBJ_DESTRUCT(mig_task); free(mig_task); @@ -349,7 +336,7 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module migrated_task_t *mig_task = NULL; dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if(is_starving(dealer_device_index)) + if( will_starve(dealer_device_index) ) return 0; starving_device_index = find_starving_device(dealer_device_index); @@ -361,11 +348,11 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module * @brief Tasks are searched in different levels one by one. At this point we assume * that the cost of migration increases, as the level increase. */ - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( &(dealer_device->pending) ); //level 0 + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_back( &(dealer_device->pending) ); //level 0 execution_level = 0; if(migrated_gpu_task == NULL) { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front( dealer_device->exec_stream[0]->fifo_pending ); //level 1 + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 execution_level = 1; if( migrated_gpu_task == NULL) @@ -396,18 +383,12 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { if(execution_level == 0) - { - parsec_list_push_front(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task ); - } + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task ); if(execution_level == 1) - { - parsec_list_push_front( dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); - } + parsec_list_push_back( dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); if(execution_level == 2) - { - parsec_list_push_front( dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); - } - + parsec_list_push_back( dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); + return nb_migrated; } @@ -416,17 +397,17 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module if(execution_level == 0) { parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - accounting[dealer_device_index].level0++; + device_info[dealer_device_index].level0++; } if(execution_level == 1) { parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - accounting[dealer_device_index].level1++; + device_info[dealer_device_index].level1++; } if(execution_level == 2) { parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - accounting[dealer_device_index].level2++; + device_info[dealer_device_index].level2++; } nb_migrated++; @@ -614,80 +595,3 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo } - - -int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device) -{ - int i; - migrated_data_t *migrated_data_item = NULL; - parsec_task_t *task = migrated_gpu_task->ec; - migrated_data_item = (migrated_data_t *) calloc(1, sizeof(migrated_data_t)); - migrated_data_item->dealer_device = dealer_device; - migrated_data_item->ht_item.key = (parsec_key_t) task->task_class->make_key((const parsec_taskpool_t*)task->taskpool, - (const parsec_assignment_t*)&task->locals); - for( i = 0; i < task->task_class->nb_flows; i++) - { - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & migrated_gpu_task->flow[i]->flow_flags)) //CTL flow - { - migrated_data_item->old_copy[i] = NULL; - continue; - } - - if(task->data->data_out == NULL) - migrated_data_item->old_copy[i] = NULL; - else - migrated_data_item->old_copy[i] = task->data->data_out; - } - - parsec_hash_table_lock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); - parsec_hash_table_nolock_insert(migrated_data_hash_table, &migrated_data_item->ht_item); - parsec_hash_table_unlock_bucket(migrated_data_hash_table, migrated_data_item->ht_item.key); - - return 1; -} - -int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task) -{ - int i; - migrated_data_t* migrated_data_item = NULL; - parsec_task_t* task = migrated_gpu_task->ec; - parsec_key_t key; - - - key = (parsec_key_t) migrated_gpu_task->ec->task_class->make_key((const parsec_taskpool_t*)migrated_gpu_task->ec->taskpool, - (const parsec_assignment_t*)&migrated_gpu_task->ec->locals); - - parsec_hash_table_lock_bucket(migrated_data_hash_table, key); - migrated_data_item = (migrated_data_t*) parsec_hash_table_nolock_remove(migrated_data_hash_table, key); - parsec_hash_table_unlock_bucket(migrated_data_hash_table, key); - - if( migrated_data_item != NULL) - { - if( migrated_gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - { - for( i = 0; i < task->task_class->nb_flows; i++) - { - if(migrated_data_item->old_copy[i] == NULL) - continue; - - parsec_data_t* original = migrated_data_item->old_copy[i]->original; - parsec_atomic_lock( &original->lock ); - - if( (PARSEC_FLOW_ACCESS_READ & migrated_gpu_task->flow[i]->flow_flags) ) - { - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(migrated_data_item->old_copy[i]); - } - - parsec_list_push_back(&migrated_data_item->dealer_device->gpu_mem_lru, - (parsec_list_item_t*)migrated_data_item->old_copy[i]); - - parsec_atomic_unlock( &original->lock ); - } - } - - free(migrated_data_item); - } - - - return 1; -} diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 45b56e9d3..8e079b105 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -14,10 +14,6 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) -#define PARSEC_DATA_STATUS_SHOULD_MIGRATE ((parsec_data_coherency_t)0x3) -#define PARSEC_DATA_STATUS_UNDER_MIGRATION ((parsec_data_coherency_t)0x4) -#define PARSEC_DATA_STATUS_MIGRATION_COMPLETE ((parsec_data_coherency_t)0x5) - #define TASK_NOT_MIGRATED 0 #define TASK_MIGRATED_BEFORE_STAGE_IN 1 #define TASK_MIGRATED_AFTER_STAGE_IN 2 @@ -37,6 +33,10 @@ typedef struct parsec_device_cuda_info_s int total_tasks_executed; int task_count[EXECUTION_LEVEL]; int load; + int level0; + int level1; + int level2; + int received; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -48,29 +48,6 @@ typedef struct migrated_task_s int stage_in_status; } migrated_task_t; -typedef struct migration_accounting_s -{ - int total_tasks_executed; - int level0; - int level1; - int level2; - int received; -} migration_accounting_t; - -typedef struct migrated_data_s -{ - parsec_hash_table_item_t ht_item; - parsec_device_gpu_module_t* dealer_device; - parsec_data_copy_t *old_copy[MAX_PARAM_COUNT]; -} migrated_data_t; - -static parsec_key_fn_t migrated_data_key_fns = { - .key_equal = parsec_hash_table_generic_64bits_key_equal, - .key_print = parsec_hash_table_generic_64bits_key_print, - .key_hash = parsec_hash_table_generic_64bits_key_hash -}; - - int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); @@ -93,8 +70,6 @@ int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* sr int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); -int migrate_hash_table_insert( parsec_gpu_task_t *migrated_gpu_task, parsec_device_gpu_module_t* dealer_device ); -int migrate_hash_table_delete( parsec_gpu_task_t *migrated_gpu_task); double current_time(); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 2ad4f8b8b..33f966a79 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2595,9 +2595,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_gpu_task_t *gpu_task, int which_gpu ) { - //which_gpu = 2; //CHANGE THIS, only for testing. device 0 is cpu, device 1 is recursive, device 2 is the first cuda gpu - //printf("Which_gpu = %d \n", which_gpu-2); - parsec_device_gpu_module_t* gpu_device; parsec_device_cuda_module_t *cuda_device; cudaError_t status; From 81c752b4e2039cf07d4c61b3345b8f80444c0531 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 2 Jun 2022 07:41:11 +1000 Subject: [PATCH 077/215] special case, where migration may push the task count to 0. --- parsec/mca/device/cuda/device_cuda_migrate.c | 11 +++-------- parsec/mca/device/cuda/device_cuda_module.c | 12 ++++++++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3baa4d125..857da46da 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -205,19 +205,14 @@ int parsec_cuda_tasks_executed(int device) */ int is_starving(int device) { - if( parsec_cuda_get_device_task(device, -1) < 1 ) - return 1; - else - return 0; + return ( parsec_cuda_get_device_task(device, -1) < 1 ) ? 1 : 0; + } int will_starve(int device) { - if( parsec_cuda_get_device_task(device, -1) < 3) - return 1; - else - return 0; + return ( parsec_cuda_get_device_task(device, -1) < 3 ) ? 1 : 0; } /** diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 33f966a79..4f1e1e537 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2777,8 +2777,15 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, while(1) { rc = gpu_device->mutex; - if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc - nb_migrated ) ) - break; + if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc - nb_migrated ) ) { + ///* update the expected load on the GPU device */ + parsec_device_load[gpu_device->super.device_index] -= nb_migrated * parsec_device_sweight[gpu_device->super.device_index]; + if( gpu_device->mutex == 0) { + rc = 1; + goto crappy_code; + } + break; + } } } @@ -2838,6 +2845,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task, __FILE__, __LINE__); free( gpu_task ); rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); +crappy_code: if( 1 == rc ) { /* I was the last one */ #if defined(PARSEC_PROF_TRACE) if( parsec_gpu_trackable_events & PARSEC_PROFILE_GPU_TRACK_OWN ) From 96fa9559a0faf70fa33fcbcc8d154171f6999176 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 2 Jun 2022 11:26:53 +1000 Subject: [PATCH 078/215] parsec_list_pop_back() changed to parsec_list_try_pop_back in migration. Special case changed form parsec_atomic_cas_int32() to parsec_atomic_fetch_add_int32(). --- parsec/mca/device/cuda/device_cuda_migrate.c | 10 ++++++---- parsec/mca/device/cuda/device_cuda_migrate.h | 2 +- parsec/mca/device/cuda/device_cuda_module.c | 19 ++++--------------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 857da46da..390c583b2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -266,7 +266,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) int stage_in_status = 0; - mig_task = (migrated_task_t*) parsec_list_pop_front(migrated_task_list); + mig_task = (migrated_task_t*) parsec_list_try_pop_front(migrated_task_list); if(mig_task != NULL) { @@ -322,7 +322,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t * @return int */ -int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) +int migrate_to_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0; @@ -343,11 +343,11 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module * @brief Tasks are searched in different levels one by one. At this point we assume * that the cost of migration increases, as the level increase. */ - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_back( &(dealer_device->pending) ); //level 0 + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( &(dealer_device->pending) ); //level 0 execution_level = 0; if(migrated_gpu_task == NULL) { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 execution_level = 1; if( migrated_gpu_task == NULL) @@ -439,6 +439,8 @@ int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module } migrated_gpu_task = NULL; + ///* update the expected load on the GPU device */ + parsec_device_load[dealer_device->super.device_index] -= nb_migrated * parsec_device_sweight[dealer_device->super.device_index]; return nb_migrated; } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 8e079b105..eedaaf17f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -62,7 +62,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es); int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, parsec_gpu_task_t* migrated_gpu_task); -int migrate_if_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); +int migrate_to_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 4f1e1e537..3d5c1cf7e 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2771,25 +2771,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * is deducted from the total number of tasks that will be executed by this * GPU. */ - nb_migrated = migrate_if_starving(es, gpu_device); + nb_migrated = migrate_to_starving(es, gpu_device); if( nb_migrated > 0 ) { - while(1) - { - rc = gpu_device->mutex; - if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc - nb_migrated ) ) { - ///* update the expected load on the GPU device */ - parsec_device_load[gpu_device->super.device_index] -= nb_migrated * parsec_device_sweight[gpu_device->super.device_index]; - if( gpu_device->mutex == 0) { - rc = 1; - goto crappy_code; - } - break; - } - } + rc = parsec_atomic_fetch_add_int32( &(gpu_device->mutex), (-1 * nb_migrated) ); + if(rc == 1) + goto crappy_code; } - assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { parsec_gpu_sort_pending_list(gpu_device); From 93c9d3936f847cf34274dccd4bfdcdd3265ad97a Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 2 Jun 2022 23:21:32 +1000 Subject: [PATCH 079/215] control flow changed --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 2 +- parsec/mca/device/cuda/device_cuda_module.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 390c583b2..d47375970 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -322,7 +322,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t * @return int */ -int migrate_to_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) +int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index eedaaf17f..c789100c7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -62,7 +62,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es); int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, parsec_gpu_task_t* migrated_gpu_task); -int migrate_to_starving(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); +int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3d5c1cf7e..3ce3dbcdb 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2771,11 +2771,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * is deducted from the total number of tasks that will be executed by this * GPU. */ - nb_migrated = migrate_to_starving(es, gpu_device); + nb_migrated = migrate_to_starving_device(es, gpu_device); if( nb_migrated > 0 ) { - rc = parsec_atomic_fetch_add_int32( &(gpu_device->mutex), (-1 * nb_migrated) ); - if(rc == 1) + //rc = parsec_atomic_fetch_add_int32( &(gpu_device->mutex), (-1 * nb_migrated) ); + //if(rc == 1) goto crappy_code; } @@ -2833,8 +2833,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream,"GPU[%s]: gpu_task %p freed at %s:%d", gpu_device->super.name, gpu_task, __FILE__, __LINE__); free( gpu_task ); - rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); crappy_code: + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); if( 1 == rc ) { /* I was the last one */ #if defined(PARSEC_PROF_TRACE) if( parsec_gpu_trackable_events & PARSEC_PROFILE_GPU_TRACK_OWN ) From b135c800c59eb172d814b120e4c67b353475c459 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 3 Jun 2022 04:52:37 +1000 Subject: [PATCH 080/215] verbose corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d47375970..4544fb120 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -306,7 +306,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; parsec_device_gpu_module_t* starving_device = mig_task->starving_device; char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); (void)es; @@ -433,7 +433,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu parsec_cuda_mig_task_enqueue(es, mig_task); char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); } From 216e13367fbdc9fb1b164629ebc342fbf94b7170 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 7 Jun 2022 06:47:20 +1000 Subject: [PATCH 081/215] changes to parsec_data_copy_detach() additional code added make sure the copy always has an owner if it is not the last copy. This is required as in some instance (when migrating) without this the owner of a device points to a NULL. --- parsec/data.c | 69 ++++++++++++++++- parsec/mca/device/cuda/device_cuda_migrate.c | 78 +++++++++++++++----- parsec/mca/device/cuda/device_cuda_migrate.h | 5 +- parsec/mca/device/cuda/device_cuda_module.c | 48 ++++++++++-- 4 files changed, 172 insertions(+), 28 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 4ed0055a9..0b8b2d0f3 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -187,6 +187,48 @@ int parsec_data_copy_detach(parsec_data_t* data, parsec_data_copy_t* copy, uint8_t device) { + uint32_t i = 0; + int younger_version = -1; + parsec_data_copy_t* new_owner_copy = NULL; + + /** + * @brief make sure the copy always has an owner if it is not the + * last copy. This is required as in some instance (when migrating) + * without this the owner of a device points to a NULL. + */ + if( data->owner_device == device) + { + for( i = 0; i < parsec_nb_devices; i++ ) + { + if( i == device) continue; + if( NULL == data->device_copies[i] ) continue; + if( data->device_copies[i]->version < copy->version) + { + younger_version = i; + continue; + } + + data->owner_device = data->device_copies[i]->device_index; + new_owner_copy = data->device_copies[i]; + } + + if( (new_owner_copy == NULL) && (younger_version == -1) ) + { + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: parsec_data_copy_detach failed to identify new owner (last copy): data %p device_copy %p", + device, data, copy); + data->owner_device = -1; + } + if( (new_owner_copy == NULL) && (device > 1) && (younger_version > -1) ) + { + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: parsec_data_copy_detach failed to identify new owner (younger version exists in device %d): data %p device_copy %p", + device, younger_version, data, copy); + assert(0); + } + + } + parsec_data_copy_t* obj = data->device_copies[device]; if( obj != copy ) { return PARSEC_ERR_NOT_FOUND; @@ -324,14 +366,15 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, int valid_copy = data->owner_device; parsec_data_copy_t* copy; + //assert(data->owner_device != device); assert(NULL != data); copy = data->device_copies[device]; assert( NULL != copy ); - + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "DEV[%d]: start transfer ownership of data %p to copy %p in mode %d", - device, data, copy, access_mode); + "DEV[%d]: data %p device_copy %p device_index %d selected for ownership transfer (original %p coherency %d)", + device, data, data->device_copies[valid_copy], valid_copy, data, PARSEC_DATA_COHERENCY_INVALID); switch( copy->coherency_state ) { case PARSEC_DATA_COHERENCY_INVALID: @@ -345,6 +388,9 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, valid_copy = i; } } + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: data %p device_copy %p device_index %d selected for ownership transfer (original %p coherency %d)", + device, data, data->device_copies[valid_copy], valid_copy, data, PARSEC_DATA_COHERENCY_INVALID); break; case PARSEC_DATA_COHERENCY_SHARED: @@ -354,6 +400,10 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, && data->device_copies[i]->version > copy->version ) { assert( (int)i == valid_copy ); transfer_required = 1; + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: data %p device_copy %p device_index %d selected for ownership transfer (original %p coherency %d)", + device, data, data->device_copies[valid_copy], valid_copy, data, PARSEC_DATA_COHERENCY_SHARED); } #if defined(PARSEC_DEBUG_PARANOID) else { @@ -371,6 +421,10 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, for( i = 0; i < parsec_nb_devices; i++ ) { if( device == i || NULL == data->device_copies[i] ) continue; assert( PARSEC_DATA_COHERENCY_INVALID == data->device_copies[i]->coherency_state ); + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: data %p device_copy %p device_index %d selected for ownership transfer (original %p coherency %d)", + device, data, data->device_copies[valid_copy], valid_copy, data, PARSEC_DATA_COHERENCY_EXCLUSIVE); } #endif /* defined(PARSEC_DEBUG_PARANOID) */ break; @@ -383,6 +437,9 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, assert( PARSEC_DATA_COHERENCY_INVALID == data->device_copies[i]->coherency_state || PARSEC_DATA_COHERENCY_SHARED == data->device_copies[i]->coherency_state ); assert( copy->version >= data->device_copies[i]->version ); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: data %p device_copy %p device_index %d selected for ownership transfer (original %p coherency %d)", + device, data, data->device_copies[valid_copy], valid_copy, data, PARSEC_DATA_COHERENCY_OWNED); } #endif /* defined(PARSEC_DEBUG_PARANOID) */ break; @@ -414,6 +471,12 @@ int parsec_data_start_transfer_ownership_to_copy(parsec_data_t* data, } } + if(data->device_copies[valid_copy] == NULL) + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: data %p goes to NULL (%p) for device_copy %d", + device, data, valid_copy); + + assert( data->device_copies[valid_copy] != NULL ); assert( (!transfer_required) || (data->device_copies[valid_copy]->version >= copy->version) ); if( PARSEC_FLOW_ACCESS_READ & access_mode ) { diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4544fb120..1653e069d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -277,7 +277,7 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) starving_device = mig_task->starving_device; stage_in_status = mig_task->stage_in_status; - change_task_features(migrated_gpu_task, dealer_device, stage_in_status); + change_task_features(migrated_gpu_task, dealer_device, starving_device, stage_in_status); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); parsec_atomic_fetch_inc_int32(&device_info[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); @@ -350,19 +350,19 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 execution_level = 1; - if( migrated_gpu_task == NULL) - { - for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) - { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 - if(migrated_gpu_task != NULL) - { - execution_level = 2; - stream_index = 2 + j; - break; - } - } - } + //if( migrated_gpu_task == NULL) + //{ + // for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) + // { + // migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 + // if(migrated_gpu_task != NULL) + // { + // execution_level = 2; + // stream_index = 2 + j; + // break; + // } + // } + //} } @@ -455,11 +455,12 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu */ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, - int stage_in_status) + parsec_device_gpu_module_t* starving_device, int stage_in_status) { int i = 0; parsec_task_t *task = gpu_task->ec; parsec_data_copy_t *src_copy = NULL; + char tmp[128]; for(i = 0; i < task->task_class->nb_flows; i++) { @@ -517,7 +518,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_in->readers > 0); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } @@ -545,15 +546,31 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t */ else { - if(task->data[i].data_out->original->owner_device == dealer_device->super.device_index && + assert( task->data[i].data_in != NULL); + if( (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version) ) { parsec_data_t* original = task->data[i].data_out->original; + + assert(original->device_copies[0] != NULL); + assert(original->device_copies[original->owner_device] != NULL); + parsec_atomic_lock( &original->lock ); task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); PARSEC_OBJ_RETAIN(task->data[i].data_in); + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "Migrate data: data %p of original %p migrated from device %d to %d for task %s", + task->data[i].data_out, original, dealer_device->super.device_index, + starving_device->super.device_index, + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task))); + + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_atomic_unlock( &original->lock ); } } @@ -591,4 +608,31 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo return 0; } +int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data, int flow_index, + parsec_data_copy_t* src_copy, parsec_data_copy_t* dst_copy, + uint8_t stage_in_device, uint8_t access_mode) +{ + assert( dst_copy != NULL ); + parsec_task_t *task = gpu_task->ec; + + /** + * @brief we are doing a D2D copy from the dealer node to the starving node. + */ + if( task->data[flow_index].data_in == src_copy && src_copy->device_index > 1) + { + if( PARSEC_FLOW_ACCESS_READ & access_mode ) + { + if( data->owner_device == src_copy->device_index) + data->owner_device = (uint8_t)stage_in_device; + } + + if( PARSEC_FLOW_ACCESS_WRITE & access_mode ) + { + data->owner_device = (uint8_t)stage_in_device; + //parsec_data_copy_detach(data, src_copy, src_copy->device_index); + } + + } +} + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index c789100c7..f5610f1df 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -68,8 +68,11 @@ int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* g int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, parsec_device_gpu_module_t* dest_dev); int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, - int stage_in_status); + parsec_device_gpu_module_t* starving_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); +int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data, int flow_index, + parsec_data_copy_t* src_copy, parsec_data_copy_t* dst_copy, + uint8_t stage_in_device, uint8_t access_mode); double current_time(); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3ce3dbcdb..f302bcca9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -771,6 +771,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de assert(0 != (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); #if defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p at %s:%d", + gpu_copy, gpu_copy->original, __FILE__, __LINE__); cudaFree( gpu_copy->device_private ); #else @@ -788,6 +790,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de gpu_device->super.device_index, NULL, 0); } #endif + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p at %s:%d", + gpu_copy, gpu_copy->original, __FILE__, __LINE__); zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); #endif gpu_copy->device_private = NULL; @@ -796,6 +800,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de * before we get here (aka below parsec_fini), the destructor of the data * collection must have been called, releasing all the copies. */ + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + gpu_copy, gpu_copy->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_copy); //assert(NULL == gpu_copy); } } @@ -816,7 +822,7 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - assert(0); + //assert(0); } #endif return PARSEC_SUCCESS; @@ -967,6 +973,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + gpu_elem, gpu_elem->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_elem); #endif parsec_atomic_unlock(&master->lock); @@ -1119,6 +1127,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, } #endif assert( 0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p at %s:%d", + lru_gpu_elem, lru_gpu_elem->original, __FILE__, __LINE__); zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); lru_gpu_elem->device_private = NULL; data_avail_epoch++; @@ -1126,6 +1136,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:%s: Release LRU-retrieved CUDA copy %p [ref_count %d: must be 1]", gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->super.super.obj_reference_count); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + lru_gpu_elem, lru_gpu_elem->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(lru_gpu_elem); assert( NULL == lru_gpu_elem ); goto malloc_data; @@ -1156,6 +1168,11 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count, __FILE__, __LINE__); parsec_data_copy_attach(master, gpu_elem, gpu_device->super.device_index); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "Allocated copy %p in original %p with device_index %d %s:%d", + gpu_elem, master, gpu_device->super.device_index, + __FILE__, __LINE__); + this_task->data[i].data_out = gpu_elem; /* set the new datacopy type to the correct one */ this_task->data[i].data_out->dtt = this_task->data[i].data_in->dtt; @@ -1424,6 +1441,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, */ if(gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); + } else { /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; @@ -1513,7 +1531,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) + { + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + } assert(0); return -1; } @@ -1540,9 +1562,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_elem->push_task = gpu_task->ec; /* only the task who does the transfer can modify the data status later. */ parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) + { + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + } return 1; } + if( undo_readers_inc_if_no_transfer ) { //parsec_atomic_fetch_dec_int32( &in_elem->readers ); @@ -1552,6 +1579,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); + //if( gpu_task->migrate_status > TASK_NOT_MIGRATED) + // gurantee_ownership_transfer(gpu_task, original, flow->flow_index, + // in_elem, gpu_elem, + // gpu_device->super.device_index, type); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\t\tNO Move %s for data copy %p [ref_count %d, key %x] of %d bytes (host v:%d / device v:%d)", gpu_device->super.name, @@ -1560,7 +1592,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, in_elem->version, gpu_elem->version); parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) + { + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + } /* TODO: data keeps the same coherence flags as before */ return 0; } @@ -2340,6 +2376,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, "GPU[%s]:\tread copy %p [ref_count %d] on flow %s has readers (%i)", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name, gpu_copy->readers); } + if( flow->flow_flags & PARSEC_FLOW_ACCESS_WRITE ) { assert( gpu_copy == parsec_data_get_copy(gpu_copy->original, gpu_device->super.device_index) ); @@ -2399,6 +2436,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, } else { assert( 0 == gpu_copy->readers ); } + original->device_copies[0]->version = gpu_copy->version; } parsec_atomic_unlock(&original->lock); } @@ -2772,13 +2810,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * GPU. */ nb_migrated = migrate_to_starving_device(es, gpu_device); - if( nb_migrated > 0 ) - { - //rc = parsec_atomic_fetch_add_int32( &(gpu_device->mutex), (-1 * nb_migrated) ); - //if(rc == 1) + if( nb_migrated > 0 ) goto crappy_code; - } - + assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { parsec_gpu_sort_pending_list(gpu_device); From 3cc25b18ca5d2e8fdd6ad0802cd7eb61ff8b62eb Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 9 Jun 2022 00:22:04 +1000 Subject: [PATCH 082/215] data_retained added to parsec_gpu_task_s to keep track of data operated upon by the migrating task, in the dealer device. --- parsec/data.c | 16 +- parsec/executed_tasks | 409 ------------------- parsec/interfaces/ptg/ptg-compiler/jdf2c.c | 1 + parsec/mca/device/cuda/device_cuda_migrate.c | 41 +- parsec/mca/device/cuda/device_cuda_module.c | 36 +- parsec/mca/device/device_gpu.h | 3 +- 6 files changed, 60 insertions(+), 446 deletions(-) delete mode 100644 parsec/executed_tasks diff --git a/parsec/data.c b/parsec/data.c index 0b8b2d0f3..472846092 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -198,6 +198,8 @@ int parsec_data_copy_detach(parsec_data_t* data, */ if( data->owner_device == device) { + parsec_atomic_lock( &data->lock ); + for( i = 0; i < parsec_nb_devices; i++ ) { if( i == device) continue; @@ -212,20 +214,26 @@ int parsec_data_copy_detach(parsec_data_t* data, new_owner_copy = data->device_copies[i]; } + parsec_atomic_unlock( &data->lock ); + if( (new_owner_copy == NULL) && (younger_version == -1) ) { PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "DEV[%d]: parsec_data_copy_detach failed to identify new owner (last copy): data %p device_copy %p", + "DEV[%d]: parsec_data_copy_detach failed to identify new owner (last copy): original %p device_copy %p", device, data, copy); data->owner_device = -1; } if( (new_owner_copy == NULL) && (device > 1) && (younger_version > -1) ) { PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "DEV[%d]: parsec_data_copy_detach failed to identify new owner (younger version exists in device %d): data %p device_copy %p", + "DEV[%d]: parsec_data_copy_detach failed to identify new owner (younger version exists in device %d): original %p device_copy %p", device, younger_version, data, copy); assert(0); } + if( new_owner_copy != NULL ) + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "DEV[%d]: parsec_data_copy_detach identified new owner original %p device_copy %p", + device, data, copy); } @@ -346,8 +354,8 @@ void parsec_data_end_transfer_ownership_to_copy(parsec_data_t* data, assert(NULL != data); copy = data->device_copies[device]; PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "DEV[%d]: end transfer ownership of data %p to copy %p in mode %d", - device, data, copy, access_mode); + "DEV[%d]: end transfer ownership of data %p to copy %p [dev_prvt %p] in mode %d", + device, data, copy, copy->device_private, access_mode); assert( NULL != copy ); if( PARSEC_FLOW_ACCESS_READ & access_mode ) { copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; diff --git a/parsec/executed_tasks b/parsec/executed_tasks deleted file mode 100644 index 56c12f323..000000000 --- a/parsec/executed_tasks +++ /dev/null @@ -1,409 +0,0 @@ -interfaces/dtd/insert_function.c:333: if( !(tp->super.devices_index_mask & (1 << device->device_index))) -interfaces/dtd/insert_function.c:335: tp->super.devices_index_mask &= ~(1 << device->device_index); -interfaces/dtd/insert_function.c:336: if((NULL == device) || (NULL == device->taskpool_unregister)) -interfaces/dtd/insert_function.c:338: (void)device->taskpool_unregister(device, &tp->super); -interfaces/dtd/insert_function.c:1258: __tp->super.devices_index_mask |= (1 << device->device_index); -interfaces/dtd/insert_function.c:1356: if( !(tp->devices_index_mask & (1 << device->device_index))) continue; /* not supported */ -interfaces/dtd/insert_function.c:1360: if( PARSEC_DEV_CUDA == device->type ) continue; -interfaces/dtd/insert_function.c:1362: if( NULL != device->taskpool_register ) -interfaces/dtd/insert_function.c:1364: device->taskpool_register(device, (parsec_taskpool_t *)tp)) { -interfaces/dtd/insert_function.c:1365: tp->devices_index_mask &= ~(1 << device->device_index); /* can't use this type */ -interfaces/ptg/ptg-compiler/jdf2c.c:4372: " if(NULL != device->taskpool_register)\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4373: " if( PARSEC_SUCCESS != device->taskpool_register(device, (parsec_taskpool_t*)__parsec_tp) ) {\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4374: " parsec_debug_verbose(5, parsec_debug_output, \"Device %%s refused to register taskpool %%p\", device->name, __parsec_tp);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4375: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4378: " if(NULL != device->memory_register) { /* Register all the data */\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4381: " supported_dev |= device->type;\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4391: " device->name, parsec_dc->key_base, parsec_dc, __parsec_tp);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4392: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4398: " device->name, parsec_dc->key_base, parsec_dc, __parsec_tp);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4399: " __parsec_tp->super.super.devices_index_mask &= ~(1 << device->device_index);\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4523: " if((NULL == (device = parsec_mca_device_get(_i))) || (NULL == device->memory_unregister)) continue;\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4537: " if((NULL == device) || (NULL == device->taskpool_unregister)) continue;\n" -interfaces/ptg/ptg-compiler/jdf2c.c:4538: " if( PARSEC_SUCCESS != device->taskpool_unregister(device, &__parsec_tp->super.super) ) continue;\n" -interfaces/ptg/ptg-compiler/jdf2c.c:6567: " struct parsec_body_cuda_%s_%s_s parsec_body = { cuda_device->cuda_index, cuda_stream->cuda_stream, NULL };\n" -interfaces/ptg/ptg-compiler/jdf2c.c:6619: " PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, \"GPU[%%s]:\\tEnqueue on device %%s priority %%d\", gpu_device->super.name, \n" -data_dist/matrix/two_dim_rectangle_cyclic.c:45: return device->memory_register(device, desc, -data_dist/matrix/two_dim_rectangle_cyclic.c:57: return device->memory_unregister(device, desc, twodbc->mat); -data_dist/matrix/sym_two_dim_rectangle_cyclic.c:48: return device->memory_register(device, desc, -data_dist/matrix/sym_two_dim_rectangle_cyclic.c:60: return device->memory_unregister(device, desc, sym_twodbc->mat); -data_internal.h:75: void *device_private; /**< The pointer to the device-specific data. -mca/device/cuda/device_cuda_migrate.c:258: dealer_device_index = dealer_device->super.device_index; -mca/device/template/device_template_module.c:75: assert(tp->devices_index_mask & (1 << device->device_index)); -mca/device/template/device_template_module.c:81: if( chores[j].type != device->type ) -mca/device/template/device_template_module.c:100: tp->devices_index_mask &= ~(1 << device->device_index); /* drop support for this device */ -mca/device/template/device_template_module.c:102: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); -mca/device/template/device_template_module.c:155: device->super.name = strdup("0"); -mca/device/template/device_template_module.c:157: device->super.type = PARSEC_DEV_TEMPLATE; -mca/device/template/device_template_module.c:158: device->super.executed_tasks = 0; -mca/device/template/device_template_module.c:159: device->super.transferred_data_in = 0; -mca/device/template/device_template_module.c:160: device->super.transferred_data_out = 0; -mca/device/template/device_template_module.c:161: device->super.required_data_in = 0; -mca/device/template/device_template_module.c:162: device->super.required_data_out = 0; -mca/device/template/device_template_module.c:164: device->super.attach = (parsec_device_attach_f)parsec_device_template_attach; -mca/device/template/device_template_module.c:165: device->super.detach = (parsec_device_detach_f)parsec_device_template_detach; -mca/device/template/device_template_module.c:166: device->super.memory_register = parsec_template_memory_register; -mca/device/template/device_template_module.c:167: device->super.memory_unregister = parsec_template_memory_unregister; -mca/device/template/device_template_module.c:168: device->super.taskpool_register = parsec_template_taskpool_register; -mca/device/template/device_template_module.c:169: device->super.taskpool_unregister = parsec_template_taskpool_unregister; -mca/device/template/device_template_module.c:171: device->super.device_hweight = 0; /* no computational capacity */ -mca/device/template/device_template_module.c:172: device->super.device_tweight = 0; -mca/device/template/device_template_module.c:173: device->super.device_sweight = 0; -mca/device/template/device_template_module.c:174: device->super.device_dweight = 0; -mca/device/template/device_template_module.c:177: parsec_inform("TEMPLATE Device %d enabled\n", device->super.device_index); -mca/device/device_gpu.c:68: if( NULL != PARSEC_DATA_GET_COPY(original, gpu_device->super.device_index) ) { -mca/device/device_gpu.c:121: parsec_list_t *sort_list = gpu_device->exec_stream[0]->fifo_pending; -mca/device/device_gpu.c:127: if (gpu_device->sort_starting_p == NULL || !parsec_list_nolock_contains(sort_list, gpu_device->sort_starting_p) ) { -mca/device/device_gpu.c:128: gpu_device->sort_starting_p = (parsec_list_item_t*)sort_list->ghost_element.list_next; -mca/device/device_gpu.c:132: parsec_list_item_t *p = gpu_device->sort_starting_p; -mca/device/device_gpu.c:183: gpu_stream->workspace->workspace[i] = zone_malloc( gpu_device->memory, size); -mca/device/device_gpu.c:186: gpu_device->super.name, -mca/device/device_gpu.c:190: (gpu_device->exec_stream[0]->prof_event_track_enable || -mca/device/device_gpu.c:191: gpu_device->exec_stream[1]->prof_event_track_enable)) { -mca/device/device_gpu.c:194: gpu_stream->workspace->workspace[i], gpu_device->super.device_index, -mca/device/device_gpu.c:222: for( i = 0; i < gpu_device->max_exec_streams; i++ ) { -mca/device/device_gpu.c:223: parsec_gpu_exec_stream_t *gpu_stream = gpu_device->exec_stream[i]; -mca/device/device_gpu.c:228: (gpu_device->exec_stream[0]->prof_event_track_enable || -mca/device/device_gpu.c:229: gpu_device->exec_stream[1]->prof_event_track_enable)) { -mca/device/device_gpu.c:232: gpu_stream->workspace->workspace[i], gpu_device->super.device_index, -mca/device/device_gpu.c:238: gpu_device->super.name, -mca/device/device_gpu.c:240: zone_free( gpu_device->memory, gpu_stream->workspace->workspace[j] ); -mca/device/device_gpu.c:311: parsec_output(parsec_gpu_output_stream, "Device %d:%d (%p) epoch\n", gpu_device->super.device_index, -mca/device/device_gpu.c:312: gpu_device->super.device_index, gpu_device, gpu_device->data_avail_epoch); -mca/device/device_gpu.c:314: gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->max_exec_streams); -mca/device/device_gpu.c:316: (unsigned long long)gpu_device->super.transferred_data_in, (unsigned long long)gpu_device->super.d2d_transfer, -mca/device/device_gpu.c:317: (unsigned long long)gpu_device->super.transferred_data_out, -mca/device/device_gpu.c:318: (unsigned long long)gpu_device->super.required_data_in, (unsigned long long)gpu_device->super.required_data_out); -mca/device/device_gpu.c:319: for( i = 0; i < gpu_device->max_exec_streams; i++ ) { -mca/device/device_gpu.c:320: dump_exec_stream(gpu_device->exec_stream[i]); -mca/device/device_gpu.c:322: if( !parsec_list_is_empty(&gpu_device->gpu_mem_lru) ) { -mca/device/device_gpu.c:325: PARSEC_LIST_ITERATOR(&gpu_device->gpu_mem_lru, item, -mca/device/device_gpu.c:334: if( !parsec_list_is_empty(&gpu_device->gpu_mem_owned_lru) ) { -mca/device/device_gpu.c:337: PARSEC_LIST_ITERATOR(&gpu_device->gpu_mem_owned_lru, item, -mca/device/device.c:312: assert( i == device->device_index ); -mca/device/device.c:314: device_counter[device->device_index] += device->executed_tasks; -mca/device/device.c:315: transferred_in[device->device_index] += device->transferred_data_in; -mca/device/device.c:316: transferred_out[device->device_index] += device->transferred_data_out; -mca/device/device.c:317: required_in[device->device_index] += device->required_data_in; -mca/device/device.c:318: required_out[device->device_index] += device->required_data_out; -mca/device/device.c:320: total += device->executed_tasks; -mca/device/device.c:321: total_data_in += device->transferred_data_in; -mca/device/device.c:322: total_data_out += device->transferred_data_out; -mca/device/device.c:323: total_required_in += device->required_data_in; -mca/device/device.c:324: total_required_out += device->required_data_out; -mca/device/device.c:326: device->executed_tasks = 0; -mca/device/device.c:327: device->transferred_data_in = 0; -mca/device/device.c:328: device->transferred_data_out = 0; -mca/device/device.c:329: device->required_data_in = 0; -mca/device/device.c:330: device->required_data_out = 0; -mca/device/device.c:352: device->device_index, device_counter[i], (device_counter[i]/gtotal)*100.00, -mca/device/device.c:356: (((double)transferred_out[i]) / (double)required_out[i]) * 100.0, device->name ); -mca/device/device.c:557: parsec_device_hweight[i] = device->device_hweight; -mca/device/device.c:558: parsec_device_sweight[i] = device->device_sweight; -mca/device/device.c:559: parsec_device_dweight[i] = device->device_dweight; -mca/device/device.c:560: parsec_device_tweight[i] = device->device_tweight; -mca/device/device.c:561: if( PARSEC_DEV_RECURSIVE == device->type ) continue; -mca/device/device.c:562: total_hperf += device->device_hweight; -mca/device/device.c:563: total_tperf += device->device_tweight; -mca/device/device.c:564: total_sperf += device->device_sweight; -mca/device/device.c:565: total_dperf += device->device_dweight; -mca/device/device.c:710: device->device_hweight = nstreams * fp_ipc * freq; /* No processor have half precision for now */ -mca/device/device.c:711: device->device_tweight = nstreams * fp_ipc * freq; /* No processor support tensor operations for now */ -mca/device/device.c:712: device->device_sweight = nstreams * fp_ipc * freq; -mca/device/device.c:713: device->device_dweight = nstreams * dp_ipc * freq; -mca/device/device.c:728: assert(tp->devices_index_mask & (1 << device->device_index)); -mca/device/device.c:736: if( chores[j].type != device->type ) -mca/device/device.c:754: tp->devices_index_mask &= ~(1 << device->device_index); /* discard this type */ -mca/device/device.c:756: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); -mca/device/device.c:820: if( NULL != device->context ) { -mca/device/device.c:833: device->device_index = parsec_nb_devices; -mca/device/device.c:835: device->context = context; -mca/device/device.c:837: PARSEC_OBJ_CONSTRUCT(&device->infos, parsec_info_object_array_t); -mca/device/device.c:838: parsec_info_object_array_init(&device->infos, &parsec_per_device_infos, device); -mca/device/device.c:839: return device->device_index; -mca/device/device.c:853: PARSEC_OBJ_DESTRUCT(&device->infos); -mca/device/device.c:855: if( NULL == device->context ) { -mca/device/device.c:859: if(device != parsec_devices[device->device_index]) { -mca/device/device.c:863: parsec_devices[device->device_index] = NULL; -mca/device/device.c:864: device->context = NULL; -mca/device/device.c:865: device->device_index = -1; -mca/device/device.c:880: if ((NULL == device) || (device->type & device_type)) -mca/device/device.c:886: tp->devices_index_mask &= ~(1 << device->device_index); -mca/device/transfer_gpu.c:222: parsec_list_item_t* item = (parsec_list_item_t*)gpu_device->gpu_mem_owned_lru.ghost_element.list_next; -mca/device/transfer_gpu.c:229: if( item == &(gpu_device->gpu_mem_owned_lru.ghost_element) ) { -mca/device/transfer_gpu.c:251: gpu_device->super.name, (void*)d2h_task, -mca/device/transfer_gpu.c:272: w2r_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; -mca/device/transfer_gpu.c:293: gpu_device->super.name, (void*)task, task->locals[0].value); -mca/device/transfer_gpu.c:300: gpu_device->super.transferred_data_out += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ -mca/device/transfer_gpu.c:311: gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); -mca/device/transfer_gpu.c:318: gpu_device->super.name, -mca/device/transfer_gpu.c:323: gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); -mca/device/transfer_gpu.c:324: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); -mca/device/transfer_gpu.c:330: gpu_device->data_avail_epoch++; -mca/device/cuda/device_cuda_module.c:179: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:180: int index, capability = cuda_device->major * 10 + cuda_device->minor; -mca/device/cuda/device_cuda_module.c:185: status = cudaSetDevice( cuda_device->cuda_index ); -mca/device/cuda/device_cuda_module.c:223: function_name, gpu_device->super.name); -mca/device/cuda/device_cuda_module.c:260: assert(PARSEC_DEV_CUDA == device->type); -mca/device/cuda/device_cuda_module.c:261: assert(tp->devices_index_mask & (1 << device->device_index)); -mca/device/cuda/device_cuda_module.c:267: if( chores[j].type != device->type ) -mca/device/cuda/device_cuda_module.c:286: tp->devices_index_mask &= ~(1 << device->device_index); /* drop support for this device */ -mca/device/cuda/device_cuda_module.c:288: "Device %d (%s) disabled for taskpool %p", device->device_index, device->name, tp); -mca/device/cuda/device_cuda_module.c:358: gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:359: device = &gpu_device->super; -mca/device/cuda/device_cuda_module.c:361: cuda_device->cuda_index = (uint8_t)dev_id; -mca/device/cuda/device_cuda_module.c:362: cuda_device->major = (uint8_t)major; -mca/device/cuda/device_cuda_module.c:363: cuda_device->minor = (uint8_t)minor; -mca/device/cuda/device_cuda_module.c:364: len = asprintf(&gpu_device->super.name, "%s (%d)", szName, dev_id); -mca/device/cuda/device_cuda_module.c:366: gpu_device->super.name = ""; -mca/device/cuda/device_cuda_module.c:367: gpu_device->data_avail_epoch = 0; -mca/device/cuda/device_cuda_module.c:369: gpu_device->max_exec_streams = PARSEC_MAX_STREAMS; -mca/device/cuda/device_cuda_module.c:370: gpu_device->exec_stream = -mca/device/cuda/device_cuda_module.c:371: (parsec_gpu_exec_stream_t**)malloc(gpu_device->max_exec_streams * sizeof(parsec_gpu_exec_stream_t*)); -mca/device/cuda/device_cuda_module.c:375: gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)malloc(gpu_device->max_exec_streams * sizeof -mca/device/cuda/device_cuda_module.c:377: for( j = 1; j < gpu_device->max_exec_streams; j++ ) { -mca/device/cuda/device_cuda_module.c:378: gpu_device->exec_stream[j] = (parsec_gpu_exec_stream_t*)( -mca/device/cuda/device_cuda_module.c:379: (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[0] + j); -mca/device/cuda/device_cuda_module.c:381: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { -mca/device/cuda/device_cuda_module.c:382: parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; -mca/device/cuda/device_cuda_module.c:433: exec_stream->profiling = gpu_device->exec_stream[0]->profiling; -mca/device/cuda/device_cuda_module.c:444: device->type = PARSEC_DEV_CUDA; -mca/device/cuda/device_cuda_module.c:445: device->executed_tasks = 0; -mca/device/cuda/device_cuda_module.c:446: device->transferred_data_in = 0; -mca/device/cuda/device_cuda_module.c:447: device->d2d_transfer = 0; -mca/device/cuda/device_cuda_module.c:448: device->transferred_data_out = 0; -mca/device/cuda/device_cuda_module.c:449: device->required_data_in = 0; -mca/device/cuda/device_cuda_module.c:450: device->required_data_out = 0; -mca/device/cuda/device_cuda_module.c:452: device->attach = parsec_device_cuda_attach; -mca/device/cuda/device_cuda_module.c:453: device->detach = parsec_device_cuda_detach; -mca/device/cuda/device_cuda_module.c:454: device->memory_register = parsec_cuda_memory_register; -mca/device/cuda/device_cuda_module.c:455: device->memory_unregister = parsec_cuda_memory_unregister; -mca/device/cuda/device_cuda_module.c:456: device->taskpool_register = parsec_cuda_taskpool_register; -mca/device/cuda/device_cuda_module.c:457: device->taskpool_unregister = parsec_cuda_taskpool_unregister; -mca/device/cuda/device_cuda_module.c:458: device->data_advise = parsec_cuda_data_advise; -mca/device/cuda/device_cuda_module.c:459: device->memory_release = parsec_cuda_flush_lru; -mca/device/cuda/device_cuda_module.c:464: "the PaRSEC runtime developers", gpu_device->super.name, major, minor ); -mca/device/cuda/device_cuda_module.c:466: device->device_hweight = (float)streaming_multiprocessor * (float)hrate * (float)clockRate * 2e-3f; -mca/device/cuda/device_cuda_module.c:467: device->device_tweight = (float)streaming_multiprocessor * (float)trate * (float)clockRate * 2e-3f; -mca/device/cuda/device_cuda_module.c:468: device->device_sweight = (float)streaming_multiprocessor * (float)srate * (float)clockRate * 2e-3f; -mca/device/cuda/device_cuda_module.c:469: device->device_dweight = (float)streaming_multiprocessor * (float)drate * (float)clockRate * 2e-3f; -mca/device/cuda/device_cuda_module.c:472: PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); -mca/device/cuda/device_cuda_module.c:473: PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); -mca/device/cuda/device_cuda_module.c:474: PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); -mca/device/cuda/device_cuda_module.c:476: gpu_device->sort_starting_p = NULL; -mca/device/cuda/device_cuda_module.c:477: gpu_device->peer_access_mask = 0; /* No GPU to GPU direct transfer by default */ -mca/device/cuda/device_cuda_module.c:495: cuda_device->cuda_index, cuda_device->major, cuda_device->minor, device->name, -mca/device/cuda/device_cuda_module.c:502: device->device_dweight, device->device_sweight, device->device_tweight, device->device_hweight); -mca/device/cuda/device_cuda_module.c:509: if( NULL != gpu_device->exec_stream) { -mca/device/cuda/device_cuda_module.c:510: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { -mca/device/cuda/device_cuda_module.c:511: parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; -mca/device/cuda/device_cuda_module.c:541: free(gpu_device->exec_stream[0]); -mca/device/cuda/device_cuda_module.c:542: free(gpu_device->exec_stream); -mca/device/cuda/device_cuda_module.c:543: gpu_device->exec_stream = NULL; -mca/device/cuda/device_cuda_module.c:557: status = cudaSetDevice( cuda_device->cuda_index ); -mca/device/cuda/device_cuda_module.c:565: PARSEC_OBJ_DESTRUCT(&gpu_device->pending); -mca/device/cuda/device_cuda_module.c:568: for( j = 0; j < gpu_device->max_exec_streams; j++ ) { -mca/device/cuda/device_cuda_module.c:569: parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; -mca/device/cuda/device_cuda_module.c:594: free(gpu_device->exec_stream[0]); -mca/device/cuda/device_cuda_module.c:595: free(gpu_device->exec_stream); -mca/device/cuda/device_cuda_module.c:596: gpu_device->exec_stream = NULL; -mca/device/cuda/device_cuda_module.c:598: cuda_device->cuda_index = -1; -mca/device/cuda/device_cuda_module.c:601: PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); -mca/device/cuda/device_cuda_module.c:602: PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); -mca/device/cuda/device_cuda_module.c:619: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:626: status = cudaSetDevice( cuda_device->cuda_index ); -mca/device/cuda/device_cuda_module.c:635: gpu_device->super.name, gpu_device->super.name); -mca/device/cuda/device_cuda_module.c:650: cuda_device->cuda_index, how_much_we_allocate, gpu_device->super.name, initial_free_mem); -mca/device/cuda/device_cuda_module.c:658: cuda_device->cuda_index, gpu_device->super.name); -mca/device/cuda/device_cuda_module.c:679: gpu_device->super.name,_free_mem, _total_mem, mem_elem_per_gpu); -mca/device/cuda/device_cuda_module.c:685: gpu_device->super.name,gpu_elem, gpu_elem->super.obj_reference_count, NULL); -mca/device/cuda/device_cuda_module.c:688: gpu_elem->device_index = gpu_device->super.device_index; -mca/device/cuda/device_cuda_module.c:693: gpu_device->super.name, gpu_elem, gpu_elem->super.obj_reference_count); -mca/device/cuda/device_cuda_module.c:694: parsec_list_push_back( &gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem ); -mca/device/cuda/device_cuda_module.c:697: if( 0 == mem_elem_per_gpu && parsec_list_is_empty( &gpu_device->gpu_mem_lru ) ) { -mca/device/cuda/device_cuda_module.c:698: parsec_warning("GPU[%s] Cannot allocate memory on GPU %s. Skip it!", gpu_device->super.name, gpu_device->super.name); -mca/device/cuda/device_cuda_module.c:703: gpu_device->super.name, mem_elem_per_gpu ); -mca/device/cuda/device_cuda_module.c:706: "GPU[%s] Allocate %u tiles on the GPU memory", gpu_device->super.name, mem_elem_per_gpu); -mca/device/cuda/device_cuda_module.c:708: if( NULL == gpu_device->memory ) { -mca/device/cuda/device_cuda_module.c:725: gpu_device->super.name, total_size); }) ); -mca/device/cuda/device_cuda_module.c:727: gpu_device->memory = zone_malloc_init( base_ptr, mem_elem_per_gpu, eltsize ); -mca/device/cuda/device_cuda_module.c:729: if( gpu_device->memory == NULL ) { -mca/device/cuda/device_cuda_module.c:731: gpu_device->super.name, gpu_device->super.name); -mca/device/cuda/device_cuda_module.c:736: gpu_device->super.name, mem_elem_per_gpu, eltsize ); -mca/device/cuda/device_cuda_module.c:739: gpu_device->mem_block_size = eltsize; -mca/device/cuda/device_cuda_module.c:740: gpu_device->mem_nb_blocks = mem_elem_per_gpu; -mca/device/cuda/device_cuda_module.c:750: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:759: gpu_device->super.name, gpu_copy, gpu_copy->device_private, gpu_copy->super.super -mca/device/cuda/device_cuda_module.c:762: assert( gpu_copy->device_index == cuda_device->super.super.device_index ); -mca/device/cuda/device_cuda_module.c:766: gpu_device->super.name, original->key); -mca/device/cuda/device_cuda_module.c:776: (gpu_device->exec_stream[0]->prof_event_track_enable || -mca/device/cuda/device_cuda_module.c:777: gpu_device->exec_stream[1]->prof_event_track_enable)) { -mca/device/cuda/device_cuda_module.c:778: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, -mca/device/cuda/device_cuda_module.c:780: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:782: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, -mca/device/cuda/device_cuda_module.c:785: gpu_device->super.device_index, NULL, 0); -mca/device/cuda/device_cuda_module.c:788: zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); -mca/device/cuda/device_cuda_module.c:810: parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); -mca/device/cuda/device_cuda_module.c:811: parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); -mca/device/cuda/device_cuda_module.c:813: if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { -mca/device/cuda/device_cuda_module.c:815: device->name, in_use); -mca/device/cuda/device_cuda_module.c:835: status = cudaSetDevice( cuda_device->cuda_index ); -mca/device/cuda/device_cuda_module.c:839: parsec_cuda_flush_lru(&cuda_device->super.super); -mca/device/cuda/device_cuda_module.c:842: assert( NULL != cuda_device->super.memory ); -mca/device/cuda/device_cuda_module.c:843: void* ptr = zone_malloc_fini(&cuda_device->super.memory); -mca/device/cuda/device_cuda_module.c:873: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:894: gpu_device->super.name, task_name, flow->name, i); -mca/device/cuda/device_cuda_module.c:901: gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index); -mca/device/cuda/device_cuda_module.c:908: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:914: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:928: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:933: gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[i]); -mca/device/cuda/device_cuda_module.c:939: lru_gpu_elem = (parsec_gpu_data_copy_t*)parsec_list_pop_front(&gpu_device->gpu_mem_lru); -mca/device/cuda/device_cuda_module.c:947: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:957: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:960: parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); -mca/device/cuda/device_cuda_module.c:972: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:983: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:1001: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:1004: parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); -mca/device/cuda/device_cuda_module.c:1012: gpu_device->super.name, task_name); -mca/device/cuda/device_cuda_module.c:1031: parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); -mca/device/cuda/device_cuda_module.c:1039: gpu_device->super.name, task_name); -mca/device/cuda/device_cuda_module.c:1052: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:1076: parsec_data_copy_detach(oldmaster, lru_gpu_elem, gpu_device->super.device_index); -mca/device/cuda/device_cuda_module.c:1084: gpu_device->super.name, task_name, this_task->task_class->name, i, lru_gpu_elem, -mca/device/cuda/device_cuda_module.c:1090: gpu_device->super.name, task_name, this_task->task_class->name, i, lru_gpu_elem); -mca/device/cuda/device_cuda_module.c:1097: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1102: (gpu_device->exec_stream[0]->prof_event_track_enable || -mca/device/cuda/device_cuda_module.c:1103: gpu_device->exec_stream[1]->prof_event_track_enable)) { -mca/device/cuda/device_cuda_module.c:1104: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, -mca/device/cuda/device_cuda_module.c:1106: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:1108: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, -mca/device/cuda/device_cuda_module.c:1111: gpu_device->super.device_index, NULL, 0); -mca/device/cuda/device_cuda_module.c:1115: zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); -mca/device/cuda/device_cuda_module.c:1120: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:1128: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1132: (gpu_device->exec_stream[0]->prof_event_track_enable || -mca/device/cuda/device_cuda_module.c:1133: gpu_device->exec_stream[1]->prof_event_track_enable)) { -mca/device/cuda/device_cuda_module.c:1134: parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, -mca/device/cuda/device_cuda_module.c:1136: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:1148: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1151: parsec_data_copy_attach(master, gpu_elem, gpu_device->super.device_index); -mca/device/cuda/device_cuda_module.c:1158: gpu_device->super.name, task_name, -mca/device/cuda/device_cuda_module.c:1161: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); -mca/device/cuda/device_cuda_module.c:1165: gpu_device->data_avail_epoch++; -mca/device/cuda/device_cuda_module.c:1267: parsec_device_gpu_module_t *gpu_device = &cuda_device->super; -mca/device/cuda/device_cuda_module.c:1280: gpu_device->super.name, gpu_task); -mca/device/cuda/device_cuda_module.c:1295: gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); -mca/device/cuda/device_cuda_module.c:1302: gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count); -mca/device/cuda/device_cuda_module.c:1314: if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { -mca/device/cuda/device_cuda_module.c:1328: if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { -mca/device/cuda/device_cuda_module.c:1333: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, in_elem, original); -mca/device/cuda/device_cuda_module.c:1341: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index); -mca/device/cuda/device_cuda_module.c:1348: gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); -mca/device/cuda/device_cuda_module.c:1368: gpu_device->super.name, in_elem, in_elem->super.super.obj_reference_count, original, gpu_elem, gpu_elem->super.super.obj_reference_count); -mca/device/cuda/device_cuda_module.c:1381: transfer_from = parsec_data_start_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); -mca/device/cuda/device_cuda_module.c:1387: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1401: gpu_device->super.required_data_in += original->nb_elts; -mca/device/cuda/device_cuda_module.c:1408: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1413: gpu_device->super.device_index, gpu_elem->version, (void*) -mca/device/cuda/device_cuda_module.c:1418: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1422: gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); -mca/device/cuda/device_cuda_module.c:1443: gpu_task->prof_tp_id = cuda_device->cuda_index; -mca/device/cuda/device_cuda_module.c:1454: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:1465: gpu_device->super.device_index, &_info, -mca/device/cuda/device_cuda_module.c:1476: in_elem->device_private, gpu_elem->device_private, cuda_device->cuda_index, -mca/device/cuda/device_cuda_module.c:1481: gpu_elem->device_private, cuda_device->cuda_index, -mca/device/cuda/device_cuda_module.c:1492: gpu_device->super.transferred_data_in += nb_elts; -mca/device/cuda/device_cuda_module.c:1494: gpu_device->super.d2d_transfer += nb_elts; -mca/device/cuda/device_cuda_module.c:1496: gpu_device->super.nb_data_faults += nb_elts; -mca/device/cuda/device_cuda_module.c:1503: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1519: parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); -mca/device/cuda/device_cuda_module.c:1523: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1583: gpu_device->super.name, parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, gpu_task), -mca/device/cuda/device_cuda_module.c:1588: gpu_device->super.name, gpu_task, __FILE__, __LINE__); -mca/device/cuda/device_cuda_module.c:1627: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:1667: gpu_device->super.name, gpu_task->ec->data[0].data_in, gpu_task->ec->data[0].data_in->super.super.obj_reference_count, -mca/device/cuda/device_cuda_module.c:1669: parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); -mca/device/cuda/device_cuda_module.c:1788: assert(gpu_stream == gpu_device->exec_stream[0]); -mca/device/cuda/device_cuda_module.c:1792: gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task)); -mca/device/cuda/device_cuda_module.c:1809: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:1816: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:1824: if( PARSEC_DEV_CUDA == src_device->super.type ) { -mca/device/cuda/device_cuda_module.c:1834: om = src_device->mutex; -mca/device/cuda/device_cuda_module.c:1837: if( parsec_atomic_cas_int32(&src_device->mutex, 0, -1) ) -mca/device/cuda/device_cuda_module.c:1852: if( parsec_atomic_cas_int32(&src_device->mutex, om, om+1) ) -mca/device/cuda/device_cuda_module.c:1864: gpu_device->super.name, task->data[i].data_in, -mca/device/cuda/device_cuda_module.c:1865: task->data[i].data_in->super.super.obj_reference_count, src_device->super.name, -mca/device/cuda/device_cuda_module.c:1871: gpu_device->super.name, task->data[i].data_in, -mca/device/cuda/device_cuda_module.c:1875: parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); -mca/device/cuda/device_cuda_module.c:1876: src_device->data_avail_epoch++; -mca/device/cuda/device_cuda_module.c:1880: rc = parsec_atomic_cas_int32(&src_device->mutex, -1, 0); (void)rc; -mca/device/cuda/device_cuda_module.c:1886: gpu_device->super.name, src_device->super.name, task->data[i].data_in, -mca/device/cuda/device_cuda_module.c:1897: gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task), -mca/device/cuda/device_cuda_module.c:1925: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, -mca/device/cuda/device_cuda_module.c:1935: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, gtask, gtask->ec); -mca/device/cuda/device_cuda_module.c:1936: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); -mca/device/cuda/device_cuda_module.c:1991: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2059: gpu_device->super.name, (void*)task); -mca/device/cuda/device_cuda_module.c:2064: gpu_device->super.name, (void*)task->ec); -mca/device/cuda/device_cuda_module.c:2081: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2112: if( gpu_task->last_data_check_epoch == gpu_device->data_avail_epoch ) -mca/device/cuda/device_cuda_module.c:2117: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2125: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2130: if( NULL != gpu_task->ec->data[0].data_in->original->device_copies[gpu_device->super.device_index] && -mca/device/cuda/device_cuda_module.c:2131: gpu_task->ec->data[0].data_in->original->owner_device == gpu_device->super.device_index ) { -mca/device/cuda/device_cuda_module.c:2135: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2137: gpu_device->super.device_index, -mca/device/cuda/device_cuda_module.c:2138: gpu_task->ec->data[0].data_in->original->device_copies[gpu_device->super.device_index]); -mca/device/cuda/device_cuda_module.c:2147: gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch; -mca/device/cuda/device_cuda_module.c:2169: gpu_device->super.name, flow->name, -mca/device/cuda/device_cuda_module.c:2180: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2231: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2263: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2274: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); -mca/device/cuda/device_cuda_module.c:2277: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); -mca/device/cuda/device_cuda_module.c:2284: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name, gpu_copy->readers); -mca/device/cuda/device_cuda_module.c:2287: assert( gpu_copy == parsec_data_get_copy(gpu_copy->original, gpu_device->super.device_index) ); -mca/device/cuda/device_cuda_module.c:2291: gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); -mca/device/cuda/device_cuda_module.c:2294: gpu_device->super.required_data_out += nb_elts; -mca/device/cuda/device_cuda_module.c:2304: gpu_device->super.name, flow->name, original->key, gpu_copy, gpu_copy->super.super.obj_reference_count, -mca/device/cuda/device_cuda_module.c:2340: gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ -mca/device/cuda/device_cuda_module.c:2351: gpu_device->data_avail_epoch++; -mca/device/cuda/device_cuda_module.c:2355: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2356: parsec_task_snprintf(tmp, MAX_TASK_STRLEN, this_task), return_code, gpu_device->data_avail_epoch ); -mca/device/cuda/device_cuda_module.c:2377: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2426: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2444: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); -mca/device/cuda/device_cuda_module.c:2449: parsec_list_push_back(&gpu_device->gpu_mem_owned_lru, (parsec_list_item_t*)gpu_copy); -mca/device/cuda/device_cuda_module.c:2485: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2505: parsec_data_copy_detach(original, gpu_copy, gpu_device->super.device_index); -mca/device/cuda/device_cuda_module.c:2515: parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); -mca/device/cuda/device_cuda_module.c:2524: gpu_device->data_avail_epoch++; -mca/device/cuda/device_cuda_module.c:2574: rc = gpu_device->mutex; -mca/device/cuda/device_cuda_module.c:2577: if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc+1 ) ) { -mca/device/cuda/device_cuda_module.c:2587: parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); -mca/device/cuda/device_cuda_module.c:2591: gpu_device->super.name, __FILE__, __LINE__); -mca/device/cuda/device_cuda_module.c:2599: status = cudaSetDevice( cuda_device->cuda_index ); -mca/device/cuda/device_cuda_module.c:2607: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2612: gpu_device->exec_stream[0], -mca/device/cuda/device_cuda_module.c:2623: PARSEC_PUSH_TASK(gpu_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*)progress_task); -mca/device/cuda/device_cuda_module.c:2634: exec_stream = (exec_stream + 1) % (gpu_device->max_exec_streams - 2); /* Choose an exec_stream */ -mca/device/cuda/device_cuda_module.c:2636: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tExecute %s priority %d", gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2641: gpu_device->exec_stream[2+exec_stream], -mca/device/cuda/device_cuda_module.c:2667: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tRetrieve data (if any) for %s priority %d", gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2673: gpu_device->exec_stream[1], -mca/device/cuda/device_cuda_module.c:2697: gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(gpu_device->pending) ); -mca/device/cuda/device_cuda_module.c:2700: gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; /* force at least one tour */ -mca/device/cuda/device_cuda_module.c:2701: PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tGet from shared queue %s priority %d", gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2711: gpu_device->super.name, gpu_device->mutex, pop_null); -mca/device/cuda/device_cuda_module.c:2719: gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2735: gpu_device->super.executed_tasks++; -mca/device/cuda/device_cuda_module.c:2737: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; -mca/device/cuda/device_cuda_module.c:2738: parsec_device_load[gpu_device->super.device_index] -= parsec_device_sweight[gpu_device->super.device_index]; -mca/device/cuda/device_cuda_module.c:2739: PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream,"GPU[%s]: gpu_task %p freed at %s:%d", gpu_device->super.name, -mca/device/cuda/device_cuda_module.c:2742: rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); -mca/device/cuda/device_cuda_module.c:2750: gpu_device->super.name, __FILE__, __LINE__); -parsec_internal.h:629: * @brief Device-level info -parsec_internal.h:645: * @details infos stored under this handle exist per device-stream: -data.c:97: if ( !(device->type & PARSEC_DEV_CUDA) ){ diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index ef4afb2d5..77474b2f6 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -6841,6 +6841,7 @@ static void jdf_generate_code_hook_cuda(const jdf_t *jdf, coutput(" parsec_device_load[dev_index] += gpu_task->load;\n" " gpu_task->migrate_status = 0;\n" + " gpu_task->data_retained = 0;\n" "\n" " return parsec_cuda_kernel_scheduler( es, gpu_task, dev_index );\n" "}\n\n"); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1653e069d..e267f5890 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -350,19 +350,19 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 execution_level = 1; - //if( migrated_gpu_task == NULL) - //{ - // for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) - // { - // migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 - // if(migrated_gpu_task != NULL) - // { - // execution_level = 2; - // stream_index = 2 + j; - // break; - // } - // } - //} + if( migrated_gpu_task == NULL) + { + for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 + if(migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } + } + } } @@ -487,6 +487,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; PARSEC_OBJ_RETAIN(task->data[i].data_in); + gpu_task->data_retained |= 1 << i; /** * @brief If the task only WRITE access, then we have to increment the @@ -510,7 +511,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) @@ -521,6 +522,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); } + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "Migrate: data %p of original %p migrated from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); assert(task->data[i].data_in->original == task->data[i].data_out->original); assert( task->data[i].data_in->original != NULL); @@ -534,6 +540,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) assert(task->data[i].data_out->version == task->data[i].data_in->version); assert(task->data[i].data_in->device_index == dealer_device->super.device_index); + assert(task->data[i].data_in->device_private != NULL); parsec_atomic_unlock( &original->lock ); @@ -560,12 +567,12 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); PARSEC_OBJ_RETAIN(task->data[i].data_in); + gpu_task->data_retained |= 1 << i; PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate data: data %p of original %p migrated from device %d to %d for task %s", + "Migrate: data %p of original %p migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, dealer_device->super.device_index, - starving_device->super.device_index, - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)gpu_task))); + starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index f302bcca9..8781d0053 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -790,8 +790,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de gpu_device->super.device_index, NULL, 0); } #endif - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p at %s:%d", - gpu_copy, gpu_copy->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p [dev_prvt %p] attached to original %p at %s:%d", + gpu_copy, gpu_copy->device_private, gpu_copy->original, __FILE__, __LINE__); zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); #endif gpu_copy->device_private = NULL; @@ -800,9 +800,9 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de * before we get here (aka below parsec_fini), the destructor of the data * collection must have been called, releasing all the copies. */ - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - gpu_copy, gpu_copy->original, __FILE__, __LINE__); - PARSEC_OBJ_RELEASE(gpu_copy); //assert(NULL == gpu_copy); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p [dev_prvt %p] attached to original %p at %s:%d", + gpu_copy, gpu_copy->device_private, gpu_copy->original, __FILE__, __LINE__); + PARSEC_OBJ_RELEASE(gpu_copy); assert(NULL == gpu_copy); } } @@ -915,15 +915,15 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, /* There is already a copy on the device */ if( NULL != gpu_elem ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s]:%s: Flow %s:%i has a copy on the device %p%s", + "GPU[%s]:%s: Flow %s:%i has a copy %p (original %p) on the device with status %s", gpu_device->super.name, task_name, - flow->name, i, gpu_elem, + flow->name, i, gpu_elem, master, gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_UNDER_TRANSFER ? " [in transfer]" : ""); if ( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_UNDER_TRANSFER ) { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s]:%s: Copy %p [ref_count %d] is still in transfer, descheduling...", + "GPU[%s]:%s: Copy %p [ref_count %d, original %p] is still in transfer, descheduling...", gpu_device->super.name, task_name, - gpu_elem, gpu_elem->super.super.obj_reference_count); + gpu_elem, master, gpu_elem->super.super.obj_reference_count); SET_HIGHEST_PRIORITY(gpu_task->ec, parsec_execution_context_priority_comparator); parsec_atomic_unlock(&master->lock); return PARSEC_HOOK_RETURN_AGAIN; @@ -1516,6 +1516,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, #endif /* Push data into the GPU from the source device */ + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "Stage in on cuda device %d from copy %p (device_private %p original %p) to copy %p (device_private %p)", + cuda_device->cuda_index, in_elem, in_elem->device_private, in_elem->original, + gpu_elem, gpu_elem->device_private); + if(PARSEC_SUCCESS != (gpu_task->stage_in ? gpu_task->stage_in(gpu_task, (1U << flow->flow_index), gpu_stream): PARSEC_SUCCESS)) { parsec_warning( "%s:%d %s", __FILE__, __LINE__, "gpu_task->stage_in"); @@ -1579,10 +1584,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); - //if( gpu_task->migrate_status > TASK_NOT_MIGRATED) - // gurantee_ownership_transfer(gpu_task, original, flow->flow_index, - // in_elem, gpu_elem, - // gpu_device->super.device_index, type); + if(gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + if( gpu_task->data_retained & (1 << flow->flow_index) ) + PARSEC_OBJ_RELEASE(gpu_task->ec->data[ flow->flow_index ].data_in); + } PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\t\tNO Move %s for data copy %p [ref_count %d, key %x] of %d bytes (host v:%d / device v:%d)", @@ -1964,9 +1970,9 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, continue; } PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s]:\tparsec_gpu_callback_complete_push, PUSH of %s: task->data[%d].data_out = %p [ref_count = %d], and push_task is %s, %s because transfer_status is %d", + "GPU[%s]:\tparsec_gpu_callback_complete_push, PUSH of %s: task->data[%d].data_out = %p [ref_count = %d, dev_prvt %p], and push_task is %s, %s because transfer_status is %d", gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task), - i, task->data[i].data_out, task->data[i].data_out->super.super.obj_reference_count, + i, task->data[i].data_out, task->data[i].data_out->device_private, task->data[i].data_out->super.super.obj_reference_count, (NULL != task->data[i].data_out->push_task) ? parsec_task_snprintf(task_str2, MAX_TASK_STRLEN, task->data[i].data_out->push_task) : "(null)", (task->data[i].data_out->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER) ? "all is good" : "Assertion", task->data[i].data_out->data_transfer_status); diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 1a6790d46..36666b15e 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -84,7 +84,8 @@ struct parsec_gpu_task_s { parsec_complete_stage_function_t complete_stage; parsec_stage_in_function_t *stage_in; parsec_stage_out_function_t *stage_out; - int migrate_status; + int migrate_status; + int32_t data_retained; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From 6468cc2b423f8334a2978891b05503bfd20b84ad Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 9 Jun 2022 07:50:42 +1000 Subject: [PATCH 083/215] Debug statements added --- parsec/mca/device/cuda/device_cuda_migrate.c | 79 +++++++++------- parsec/mca/device/cuda/device_cuda_module.c | 97 +++++++++++++++----- 2 files changed, 120 insertions(+), 56 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e267f5890..4e78a9954 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -486,61 +486,70 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t */ task->data[i].data_in = task->data[i].data_out; task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - PARSEC_OBJ_RETAIN(task->data[i].data_in); - gpu_task->data_retained |= 1 << i; - /** - * @brief If the task only WRITE access, then we have to increment the - * reader of the data_in, so that it does not go into negative value - * when we call complete_stage( parsec_gpu_callback_complete_push() ) - * after the second stage in of the task is completed (on the starving device). - * - * If the task only as READ access it is already in the gpu_mem_owned_lru of - * the dealer device. If it has WRITE and READ-WRITE access we move the data - * to gpu_mem_owned_lru. - */ if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert( task->data[i].data_in->readers > 0 ); + + PARSEC_OBJ_RETAIN(task->data[i].data_in); + gpu_task->data_retained |= 1 << i; + + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //assert( task->data[i].data_in->super.super.obj_reference_count == 1); + if( task->data[i].data_in->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + else + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + } - if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers >= 0); - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); + assert(task->data[i].data_in->readers > 0); + + PARSEC_OBJ_RETAIN(task->data[i].data_in); + gpu_task->data_retained |= 1 << i; + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + //assert( task->data[i].data_in->super.super.obj_reference_count == 1); + if( task->data[i].data_in->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + else + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + } - if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers > 0); + assert(task->data[i].data_in->readers == 0); + + PARSEC_OBJ_RETAIN(task->data[i].data_in); + gpu_task->data_retained |= 1 << i; + + PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + assert( task->data[i].data_in->super.super.obj_reference_count == 1); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); - } + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate: data %p of original %p migrated from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + - assert(task->data[i].data_in->original == task->data[i].data_out->original); - assert( task->data[i].data_in->original != NULL); - if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) ) - assert( task->data[i].data_out->version == task->data[i].data_in->version); assert(task->data[i].data_out != NULL); assert(original->device_copies[dealer_device->super.device_index]!= NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); - assert(task->data[i].data_in->readers >= 0); - assert( task->data[i].data_out->version == task->data[i].data_in->version); - if(task->data[i].data_out->original->owner_device != dealer_device->super.device_index) - assert(task->data[i].data_out->version == task->data[i].data_in->version); assert(task->data[i].data_in->device_index == dealer_device->super.device_index); assert(task->data[i].data_in->device_private != NULL); + assert( task->data[i].data_in->device_index == dealer_device->super.device_index ); parsec_atomic_unlock( &original->lock ); @@ -569,10 +578,14 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_OBJ_RETAIN(task->data[i].data_in); gpu_task->data_retained |= 1 << i; + assert( task->data[i].data_in->device_index == dealer_device->super.device_index ); + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate: data %p of original %p migrated from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 8781d0053..f4b3b75fd 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -692,8 +692,10 @@ parsec_cuda_memory_reserve( parsec_device_cuda_module_t* cuda_device, mem_elem_per_gpu++; PARSEC_OBJ_RETAIN(gpu_elem); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s] Retain and insert CUDA copy %p [ref_count %d] in LRU", - gpu_device->super.name, gpu_elem, gpu_elem->super.obj_reference_count); + "GPU[%s] Retain and insert CUDA copy %p attached to original %p [readers %d, ref_count %d] in LRU", + gpu_device->super.name, gpu_elem, gpu_elem->original, + gpu_elem->readers, gpu_elem->super.super.obj_reference_count); + // assert( gpu_elem->super.super.obj_reference_count == 1); parsec_list_push_back( &gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem ); cudaMemGetInfo( &free_mem, &total_mem ); } @@ -758,8 +760,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de parsec_data_t* original = gpu_copy->original; PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream, - "GPU[%s] Release CUDA copy %p (device_ptr %p) [ref_count %d: must be 1], attached to %p, in map %p", - gpu_device->super.name, gpu_copy, gpu_copy->device_private, gpu_copy->super.super + "GPU[%s] (gpu_copy->device_index: %d should be same as %d ) Release CUDA copy %p (device_ptr %p) [ref_count %d: must be 1], attached to %p, in map %p", + gpu_device->super.name, gpu_copy->device_index, cuda_device->super.super.device_index, gpu_copy, gpu_copy->device_private, gpu_copy->super.super .obj_reference_count, original, (NULL != original ? original->dc : NULL)); assert( gpu_copy->device_index == cuda_device->super.super.device_index ); @@ -800,9 +802,25 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de * before we get here (aka below parsec_fini), the destructor of the data * collection must have been called, releasing all the copies. */ - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p [dev_prvt %p] attached to original %p at %s:%d", - gpu_copy, gpu_copy->device_private, gpu_copy->original, __FILE__, __LINE__); - PARSEC_OBJ_RELEASE(gpu_copy); assert(NULL == gpu_copy); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p [dev_prvt %p readers %d ref_count %d] attached to original %p at %s:%d", + gpu_copy, gpu_copy->device_private, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, gpu_copy->original, __FILE__, __LINE__); + PARSEC_OBJ_RELEASE(gpu_copy); + + int i, ref_count; + if( gpu_copy != NULL) + { + parsec_warning("parsec_cuda_memory_release_list: Release copy %p original %d readers %d ref_count %d. The copy should have been NULL by this point!! (%s:%d)", + gpu_copy, gpu_copy->original, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, __FILE__, __LINE__); + + //ref_count = gpu_copy->super.super.obj_reference_count; + //for( i = 0; i < ref_count; i++) + // PARSEC_OBJ_RELEASE(gpu_copy); + + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "parsec_cuda_memory_release_list: key_base %d key %d", + gpu_copy->original->dc->key_base, gpu_copy->original->key); + } + + assert(NULL == gpu_copy); } } @@ -822,7 +840,7 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - //assert(0); + assert(0); } #endif return PARSEC_SUCCESS; @@ -970,11 +988,13 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, temp_loc[j], temp_loc[j]->super.super.obj_reference_count); /* push them at the head to reach them again at the next iteration */ + //assert( temp_loc[j]->super.super.obj_reference_count == 1); parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - gpu_elem, gpu_elem->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d ]at %s:%d", + gpu_elem, gpu_elem->original, gpu_elem->readers, gpu_elem->super.super.obj_reference_count, + __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_elem); #endif parsec_atomic_unlock(&master->lock); @@ -1016,6 +1036,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); + //assert( lru_gpu_elem->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1043,6 +1064,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, * might be adding/removing other elements to the list, so we * need to protect all accesses to gpu_mem_lru with the locked version */ assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); + //assert( lru_gpu_elem->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1136,8 +1158,10 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:%s: Release LRU-retrieved CUDA copy %p [ref_count %d: must be 1]", gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->super.super.obj_reference_count); - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - lru_gpu_elem, lru_gpu_elem->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + lru_gpu_elem, lru_gpu_elem->original, + lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, + __FILE__, __LINE__); PARSEC_OBJ_RELEASE(lru_gpu_elem); assert( NULL == lru_gpu_elem ); goto malloc_data; @@ -1182,6 +1206,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, gpu_elem, gpu_elem->super.super.obj_reference_count); assert(0 != (gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); + //assert( gpu_elem->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); parsec_atomic_unlock(&master->lock); } @@ -1385,7 +1410,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, undo_readers_inc_if_no_transfer = 1; /* We swap data_in with candidate, so we update the reference counters */ PARSEC_OBJ_RETAIN(candidate); - release_after_data_in_is_attached = task_data->data_in; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy", + gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); + task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1537,8 +1565,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, + release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); } assert(0); @@ -1568,8 +1598,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, + release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); } return 1; @@ -1587,7 +1619,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if(gpu_task->migrate_status > TASK_NOT_MIGRATED) { if( gpu_task->data_retained & (1 << flow->flow_index) ) + { + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", + gpu_task->ec->data[ flow->flow_index ].data_in, gpu_task->ec->data[ flow->flow_index ].data_in->original, + gpu_task->ec->data[ flow->flow_index ].data_in->readers, gpu_task->ec->data[ flow->flow_index ].data_in->super.super.obj_reference_count, + __FILE__, __LINE__); + PARSEC_OBJ_RELEASE(gpu_task->ec->data[ flow->flow_index ].data_in); + } } PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, @@ -1599,8 +1638,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, + release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); } /* TODO: data keeps the same coherence flags as before */ @@ -1729,11 +1770,12 @@ parsec_cuda_data_advise(parsec_device_module_t *dev, parsec_data_t *data, int ad gpu_task->flow_nb_elts[0] = data->device_copies[ data->owner_device ]->original->nb_elts; gpu_task->stage_in = parsec_default_cuda_stage_in; gpu_task->stage_out = parsec_default_cuda_stage_out; - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Retain data copy %p [ref_count %d] at %s:%d", - data->device_copies[ data->owner_device ], + PARSEC_OBJ_RETAIN(data->device_copies[ data->owner_device ]); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Retain data copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + data->device_copies[ data->owner_device ], data->device_copies[ data->owner_device ]->original, + data->device_copies[ data->owner_device ]->readers, data->device_copies[ data->owner_device ]->super.super.obj_reference_count, __FILE__, __LINE__); - PARSEC_OBJ_RETAIN(data->device_copies[ data->owner_device ]); gpu_task->ec->data[0].data_in = data->device_copies[ data->owner_device ]; gpu_task->ec->data[0].data_out = NULL; gpu_task->ec->data[0].source_repo_entry = NULL; @@ -1949,6 +1991,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, task->data[i].data_in->super.super.obj_reference_count); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); + //assert( task->data[i].data_in->super.super.obj_reference_count == 1); parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); src_device->data_avail_epoch++; } @@ -2011,6 +2054,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream, "GPU[%s]:\tMake copy %p [ref_count %d] available after prefetch from gpu_task %p, ec %p", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, gtask, gtask->ec); + //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } return parsec_cuda_destroy_task(gpu_device, gpu_task); @@ -2346,6 +2390,9 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, original = gpu_copy->original; nb_elts = gpu_task->flow_nb_elts[i]; + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, + "GPU[%s]:\tTry kernel_pop for copy %p attached to original %p [readers %d, ref_count %d] available on flow %s", + gpu_device->super.name, gpu_copy, original, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, flow->name); assert( this_task->data[i].data_in == NULL || original == this_task->data[i].data_in->original ); if( !(flow->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { @@ -2372,7 +2419,8 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, "GPU[%s]:\tMake read-only copy %p [ref_count %d] available on flow %s", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); - PARSEC_LIST_ITEM_SINGLETON(gpu_copy); /* TODO: singleton instead? */ + PARSEC_LIST_ITEM_SINGLETON(gpu_copy); /* TODO: singleton instead? */ + //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); update_data_epoch = 1; parsec_atomic_unlock(&original->lock); @@ -2542,11 +2590,13 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, gpu_copy, gpu_copy->super.super.obj_reference_count, __func__); parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); + //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } else { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "CUDA copy %p [ref_count %d] moved to the owned LRU in %s", gpu_copy, gpu_copy->super.super.obj_reference_count, __func__); + //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_owned_lru, (parsec_list_item_t*)gpu_copy); } } @@ -2613,6 +2663,7 @@ parsec_cuda_kernel_cleanout( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; if( 0 != (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) { + //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } parsec_atomic_unlock(&original->lock); From b7c5fa88adec5e2289aef48056eb4d882f0766c0 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 10 Jun 2022 05:38:26 +1000 Subject: [PATCH 084/215] New workflow based on candidate selection implemented. --- parsec/mca/device/cuda/device_cuda_migrate.c | 123 +++++++------------ parsec/mca/device/cuda/device_cuda_module.c | 91 +++++++++++--- parsec/mca/device/device_gpu.h | 1 + 3 files changed, 117 insertions(+), 98 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4e78a9954..b723eb5e6 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -325,7 +325,7 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) { int starving_device_index = -1, dealer_device_index = 0; - int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0; + int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0, k = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t* starving_device = NULL; migrated_task_t *mig_task = NULL; @@ -425,6 +425,8 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; + //memset(migrated_gpu_task->posssible_candidate, -1, sizeof(int32_t)); + for( k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->posssible_candidate[k] = -1; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; @@ -478,63 +480,49 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t* original = task->data[i].data_out->original; parsec_atomic_lock( &original->lock ); - /** - * @brief If the task is stage in the data is already available in the - * dealer GPU. So we can set data_in = data_out, in order to make sure - * that the source data for the second stage in is always selected as the - * data in the delaer GPU. - */ - task->data[i].data_in = task->data[i].data_out; - task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + //#if 0 if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert( task->data[i].data_in->readers > 0 ); - - PARSEC_OBJ_RETAIN(task->data[i].data_in); - gpu_task->data_retained |= 1 << i; + assert( task->data[i].data_out->readers > 0 ); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //assert( task->data[i].data_in->super.super.obj_reference_count == 1); - if( task->data[i].data_in->version > original->device_copies[0]->version) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + //assert( task->data[i].data_out->super.super.obj_reference_count == 1); + if( task->data[i].data_out->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); } if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers > 0); - - PARSEC_OBJ_RETAIN(task->data[i].data_in); - gpu_task->data_retained |= 1 << i; + assert(task->data[i].data_out->readers > 0); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //assert( task->data[i].data_in->super.super.obj_reference_count == 1); - if( task->data[i].data_in->version > original->device_copies[0]->version) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + //assert( task->data[i].data_out->super.super.obj_reference_count == 1); + if( task->data[i].data_out->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); } if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_in->readers == 0); + assert(task->data[i].data_out->readers == 0); - PARSEC_OBJ_RETAIN(task->data[i].data_in); - gpu_task->data_retained |= 1 << i; - - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - assert( task->data[i].data_in->super.super.obj_reference_count == 1); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + assert( task->data[i].data_out->super.super.obj_reference_count == 1); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); } + //#endif PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, @@ -547,19 +535,14 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out != NULL); assert(original->device_copies[dealer_device->super.device_index]!= NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); - assert(task->data[i].data_in->device_index == dealer_device->super.device_index); - assert(task->data[i].data_in->device_private != NULL); - assert( task->data[i].data_in->device_index == dealer_device->super.device_index ); + assert(task->data[i].data_out->device_index == dealer_device->super.device_index); + assert(task->data[i].data_out->device_private != NULL); + assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); parsec_atomic_unlock( &original->lock ); } - /** - * Data is not yet staged in the dealer device, but some of the data we need maybe - * already in the delaer device and the dealer device may have the latest data. - * In that case we set data_in = data_out. - */ else { assert( task->data[i].data_in != NULL); @@ -572,24 +555,26 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[original->owner_device] != NULL); parsec_atomic_lock( &original->lock ); - task->data[i].data_in = task->data[i].data_out; - task->data[i].data_in->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_in ); - PARSEC_OBJ_RETAIN(task->data[i].data_in); - gpu_task->data_retained |= 1 << i; + //task->data[i].data_out = task->data[i].data_out; + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_out ); + + //PARSEC_OBJ_RETAIN(task->data[i].data_out); + //gpu_task->data_retained |= 1 << i; + gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - assert( task->data[i].data_in->device_index == dealer_device->super.device_index ); + assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", + "Migrate: data %p attached to original %p [readers %d, ref_count %d] possiible candidate from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); + //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); parsec_atomic_unlock( &original->lock ); } @@ -628,31 +613,5 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo return 0; } -int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data, int flow_index, - parsec_data_copy_t* src_copy, parsec_data_copy_t* dst_copy, - uint8_t stage_in_device, uint8_t access_mode) -{ - assert( dst_copy != NULL ); - parsec_task_t *task = gpu_task->ec; - - /** - * @brief we are doing a D2D copy from the dealer node to the starving node. - */ - if( task->data[flow_index].data_in == src_copy && src_copy->device_index > 1) - { - if( PARSEC_FLOW_ACCESS_READ & access_mode ) - { - if( data->owner_device == src_copy->device_index) - data->owner_device = (uint8_t)stage_in_device; - } - - if( PARSEC_FLOW_ACCESS_WRITE & access_mode ) - { - data->owner_device = (uint8_t)stage_in_device; - //parsec_data_copy_detach(data, src_copy, src_copy->device_index); - } - - } -} diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index f4b3b75fd..5c8de6caa 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -696,6 +696,7 @@ parsec_cuda_memory_reserve( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, gpu_elem, gpu_elem->original, gpu_elem->readers, gpu_elem->super.super.obj_reference_count); // assert( gpu_elem->super.super.obj_reference_count == 1); + assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back( &gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem ); cudaMemGetInfo( &free_mem, &total_mem ); } @@ -989,6 +990,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, temp_loc[j], temp_loc[j]->super.super.obj_reference_count); /* push them at the head to reach them again at the next iteration */ //assert( temp_loc[j]->super.super.obj_reference_count == 1); + assert(temp_loc[j]->device_index == gpu_device->super.device_index); parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) @@ -1037,6 +1039,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); //assert( lru_gpu_elem->super.super.obj_reference_count == 1); + assert(lru_gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1065,6 +1068,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, * need to protect all accesses to gpu_mem_lru with the locked version */ assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); //assert( lru_gpu_elem->super.super.obj_reference_count == 1); + assert(lru_gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1207,6 +1211,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count); assert(0 != (gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); //assert( gpu_elem->super.super.obj_reference_count == 1); + assert(gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); parsec_atomic_unlock(&master->lock); } @@ -1376,7 +1381,48 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* If gpu_elem is not invalid, then it is already there and the right version, * and we're not going to transfer from another source, skip the selection */ if( gpu_elem->coherency_state != PARSEC_DATA_COHERENCY_INVALID ) - goto src_selected; + { + if( (gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) + { + + int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; + parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; + parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(candidate); + + goto src_selected; + } + } + + + if( (gpu_task->migrate_status > TASK_NOT_MIGRATED) + && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) + { + int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; + parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; + parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); + + if( PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ) + { + if(gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + undo_readers_inc_if_no_transfer = 1; + + PARSEC_OBJ_RETAIN(candidate); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best possible candidate to to Device to Device copy", + gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); + + release_after_data_in_is_attached = task_data->data_in; + task_data->data_in = candidate; + in_elem = candidate; + in_elem_dev = target; + + goto src_selected; + + } + } for(int t = 1; t < (int)parsec_nb_devices; t++) { parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(t); @@ -1384,7 +1430,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { parsec_data_copy_t *candidate = original->device_copies[t]; - if( (NULL != candidate && candidate->version == in_elem->version) ) + if( (NULL != candidate && candidate->version == in_elem->version )) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is a potential alternative source for in_elem %p on data %p", @@ -1414,6 +1460,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); + release_after_data_in_is_attached = task_data->data_in; task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1467,8 +1514,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * GPU we have to compensate for the reader increment made during * the first stage_in */ - if(gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); + //if(gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + // PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); } else { /* Update the transferred required_data_in size */ @@ -1616,18 +1663,18 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); - if(gpu_task->migrate_status > TASK_NOT_MIGRATED) - { - if( gpu_task->data_retained & (1 << flow->flow_index) ) - { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", - gpu_task->ec->data[ flow->flow_index ].data_in, gpu_task->ec->data[ flow->flow_index ].data_in->original, - gpu_task->ec->data[ flow->flow_index ].data_in->readers, gpu_task->ec->data[ flow->flow_index ].data_in->super.super.obj_reference_count, - __FILE__, __LINE__); - - PARSEC_OBJ_RELEASE(gpu_task->ec->data[ flow->flow_index ].data_in); - } - } + //if(gpu_task->migrate_status > TASK_NOT_MIGRATED) + //{ + // if( gpu_task->data_retained & (1 << flow->flow_index) ) + // { + // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", + // gpu_task->ec->data[ flow->flow_index ].data_in, gpu_task->ec->data[ flow->flow_index ].data_in->original, + // gpu_task->ec->data[ flow->flow_index ].data_in->readers, gpu_task->ec->data[ flow->flow_index ].data_in->super.super.obj_reference_count, + // __FILE__, __LINE__); +// + // PARSEC_OBJ_RELEASE(gpu_task->ec->data[ flow->flow_index ].data_in); + // } + //} PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\t\tNO Move %s for data copy %p [ref_count %d, key %x] of %d bytes (host v:%d / device v:%d)", @@ -1992,6 +2039,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); //assert( task->data[i].data_in->super.super.obj_reference_count == 1); + assert(task->data[i].data_in->device_index == src_device->super.device_index); parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); src_device->data_avail_epoch++; } @@ -2055,6 +2103,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, "GPU[%s]:\tMake copy %p [ref_count %d] available after prefetch from gpu_task %p, ec %p", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, gtask, gtask->ec); //assert( gpu_copy->super.super.obj_reference_count == 1); + assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } return parsec_cuda_destroy_task(gpu_device, gpu_task); @@ -2421,6 +2470,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); /* TODO: singleton instead? */ //assert( gpu_copy->super.super.obj_reference_count == 1); + assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); update_data_epoch = 1; parsec_atomic_unlock(&original->lock); @@ -2528,12 +2578,19 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, #endif for( i = 0; i < this_task->task_class->nb_flows; i++ ) { + /* Make sure data_in is not NULL */ if( NULL == this_task->data[i].data_in ) continue; /* Don't bother if there is no real data (aka. CTL or no output) */ if(NULL == this_task->data[i].data_out) continue; + //if(0 != this_task->data[i].data_in->device_index) + //{ + // assert( this_task->data[i].data_in->original->device_copies[0] != NULL); + // this_task->data[i].data_in = this_task->data[i].data_in->original->device_copies[0]; + //} + if( !(gpu_task->flow[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { /* Warning data_out for read only flows has been overwritten in pop */ @@ -2591,6 +2648,7 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); //assert( gpu_copy->super.super.obj_reference_count == 1); + assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } else { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, @@ -2664,6 +2722,7 @@ parsec_cuda_kernel_cleanout( parsec_device_gpu_module_t *gpu_device, this_task->data[i].data_out = cpu_copy; if( 0 != (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) { //assert( gpu_copy->super.super.obj_reference_count == 1); + assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } parsec_atomic_unlock(&original->lock); diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 36666b15e..3ac237da7 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -86,6 +86,7 @@ struct parsec_gpu_task_s { parsec_stage_out_function_t *stage_out; int migrate_status; int32_t data_retained; + int32_t posssible_candidate[MAX_PARAM_COUNT]; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From fd3b306bc14e2d84705e29af5fe8277946f2272c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 10 Jun 2022 17:08:43 -0400 Subject: [PATCH 085/215] additional debug statements --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_module.c | 62 ++++++++------------ parsec/mca/device/device_gpu.h | 2 +- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b723eb5e6..c05be0677 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -369,7 +369,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu if(migrated_gpu_task != NULL) { assert(migrated_gpu_task->ec != NULL); - parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); + //parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); /** * @brief if the task is a not a computational kerenel or if it is a task that has diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 5c8de6caa..b9df483e0 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -692,8 +692,8 @@ parsec_cuda_memory_reserve( parsec_device_cuda_module_t* cuda_device, mem_elem_per_gpu++; PARSEC_OBJ_RETAIN(gpu_elem); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s] Retain and insert CUDA copy %p attached to original %p [readers %d, ref_count %d] in LRU", - gpu_device->super.name, gpu_elem, gpu_elem->original, + "GPU[%s] Retain and insert CUDA copy %p attached to original %p on device_index %d [readers %d, ref_count %d] in LRU", + gpu_device->super.name, gpu_elem, gpu_elem->original, gpu_elem->device_index, gpu_elem->readers, gpu_elem->super.super.obj_reference_count); // assert( gpu_elem->super.super.obj_reference_count == 1); assert(gpu_copy->device_index == gpu_device->super.device_index); @@ -793,8 +793,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de gpu_device->super.device_index, NULL, 0); } #endif - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p [dev_prvt %p] attached to original %p at %s:%d", - gpu_copy, gpu_copy->device_private, gpu_copy->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p [dev_prvt %p] attached to original %p on device_index %d at %s:%d", + gpu_copy, gpu_copy->device_private, gpu_copy->original, gpu_copy->device_index ,__FILE__, __LINE__); zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); #endif gpu_copy->device_private = NULL; @@ -803,8 +803,8 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de * before we get here (aka below parsec_fini), the destructor of the data * collection must have been called, releasing all the copies. */ - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p [dev_prvt %p readers %d ref_count %d] attached to original %p at %s:%d", - gpu_copy, gpu_copy->device_private, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, gpu_copy->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p [dev_prvt %p readers %d ref_count %d] attached to original %p on device_index %d at %s:%d", + gpu_copy, gpu_copy->device_private, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, gpu_copy->original, gpu_copy->device_index, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_copy); int i, ref_count; @@ -994,8 +994,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d ]at %s:%d", - gpu_elem, gpu_elem->original, gpu_elem->readers, gpu_elem->super.super.obj_reference_count, + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d ]at %s:%d", + gpu_elem, gpu_elem->original, gpu_elem->device_index,gpu_elem->readers, gpu_elem->super.super.obj_reference_count, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_elem); #endif @@ -1134,9 +1134,9 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) /* Let's free this space, and try again to malloc some space */ PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream, - "GPU[%s] Release CUDA copy %p (device_ptr %p) [ref_count %d: must be 1], attached to %p", + "GPU[%s] Release CUDA copy %p (device_ptr %p) [ref_count %d: must be 1], attached to %p on device_index %d", gpu_device->super.name, - lru_gpu_elem, lru_gpu_elem->device_private, lru_gpu_elem->super.super.obj_reference_count, + lru_gpu_elem, lru_gpu_elem->device_index, lru_gpu_elem->device_private, lru_gpu_elem->super.super.obj_reference_count, oldmaster); #if defined(PARSEC_PROF_TRACE) if((parsec_gpu_trackable_events & PARSEC_PROFILE_GPU_TRACK_MEM_USE) && @@ -1153,8 +1153,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, } #endif assert( 0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p at %s:%d", - lru_gpu_elem, lru_gpu_elem->original, __FILE__, __LINE__); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p on device_index %d at %s:%d", + lru_gpu_elem, lru_gpu_elem->original, lru_gpu_elem->device_index, __FILE__, __LINE__); zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); lru_gpu_elem->device_private = NULL; data_avail_epoch++; @@ -1162,8 +1162,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:%s: Release LRU-retrieved CUDA copy %p [ref_count %d: must be 1]", gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->super.super.obj_reference_count); - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", - lru_gpu_elem, lru_gpu_elem->original, + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", + lru_gpu_elem, lru_gpu_elem->original, lru_gpu_elem->device_index, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(lru_gpu_elem); @@ -1612,8 +1612,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); @@ -1645,8 +1645,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at on device_index %d [readers %d, ref_count %d] %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); @@ -1663,18 +1663,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); - //if(gpu_task->migrate_status > TASK_NOT_MIGRATED) - //{ - // if( gpu_task->data_retained & (1 << flow->flow_index) ) - // { - // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", - // gpu_task->ec->data[ flow->flow_index ].data_in, gpu_task->ec->data[ flow->flow_index ].data_in->original, - // gpu_task->ec->data[ flow->flow_index ].data_in->readers, gpu_task->ec->data[ flow->flow_index ].data_in->super.super.obj_reference_count, - // __FILE__, __LINE__); -// - // PARSEC_OBJ_RELEASE(gpu_task->ec->data[ flow->flow_index ].data_in); - // } - //} PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\t\tNO Move %s for data copy %p [ref_count %d, key %x] of %d bytes (host v:%d / device v:%d)", @@ -1685,8 +1673,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); if( NULL != release_after_data_in_is_attached ) { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", + release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); @@ -1818,9 +1806,9 @@ parsec_cuda_data_advise(parsec_device_module_t *dev, parsec_data_t *data, int ad gpu_task->stage_in = parsec_default_cuda_stage_in; gpu_task->stage_out = parsec_default_cuda_stage_out; PARSEC_OBJ_RETAIN(data->device_copies[ data->owner_device ]); - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Retain data copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Retain data copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", data->device_copies[ data->owner_device ], data->device_copies[ data->owner_device ]->original, - data->device_copies[ data->owner_device ]->readers, + data->device_copies[ data->owner_device ]->device_index, data->device_copies[ data->owner_device ]->readers, data->device_copies[ data->owner_device ]->super.super.obj_reference_count, __FILE__, __LINE__); gpu_task->ec->data[0].data_in = data->device_copies[ data->owner_device ]; @@ -2440,8 +2428,10 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, nb_elts = gpu_task->flow_nb_elts[i]; PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, - "GPU[%s]:\tTry kernel_pop for copy %p attached to original %p [readers %d, ref_count %d] available on flow %s", - gpu_device->super.name, gpu_copy, original, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, flow->name); + "GPU[%s]:\tTry kernel_pop for copy %p attached to original %p on device_index %d [readers %d, ref_count %d] available on flow %s ( %s )", + gpu_device->super.name, gpu_copy, original, gpu_copy->device_index, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, flow->name, + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, this_task) ); + assert( this_task->data[i].data_in == NULL || original == this_task->data[i].data_in->original ); if( !(flow->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 3ac237da7..7876912e9 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -85,8 +85,8 @@ struct parsec_gpu_task_s { parsec_stage_in_function_t *stage_in; parsec_stage_out_function_t *stage_out; int migrate_status; - int32_t data_retained; int32_t posssible_candidate[MAX_PARAM_COUNT]; + int32_t data_retained; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From ad1cc40cc35de1502d872653df26914737f11d41 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 13 Jun 2022 11:00:29 -0400 Subject: [PATCH 086/215] Additional condition added to change_task_features() In some cases, the CPU device_copy is NULL while the GPU device_copy still exist. This should not happen. To side step this problem we add an additional condition. The order of confition evaluation is important, or else it may result in a segfault. --- parsec/data.c | 4 ++-- parsec/mca/device/cuda/device_cuda_migrate.c | 4 ++-- parsec/mca/device/cuda/device_cuda_module.c | 9 --------- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 472846092..8ea700027 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -232,8 +232,8 @@ int parsec_data_copy_detach(parsec_data_t* data, } if( new_owner_copy != NULL ) PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "DEV[%d]: parsec_data_copy_detach identified new owner original %p device_copy %p", - device, data, copy); + "DEV[%d]: identified new owner for original %p : device_copy %p on device_index %d (old owner was copy %p on device_index %d)", + device, data, new_owner_copy, new_owner_copy->device_index, copy, copy->device_index); } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c05be0677..ae12d93c7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -492,7 +492,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); //assert( task->data[i].data_out->super.super.obj_reference_count == 1); - if( task->data[i].data_out->version > original->device_copies[0]->version) + if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); @@ -506,7 +506,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); //assert( task->data[i].data_out->super.super.obj_reference_count == 1); - if( task->data[i].data_out->version > original->device_copies[0]->version) + if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index b9df483e0..6c58026b4 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1508,15 +1508,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* Do not need to be tranferred */ if( -1 == transfer_from ) { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - - /** - * When we are migrating of the data is already in the destination - * GPU we have to compensate for the reader increment made during - * the first stage_in - */ - //if(gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - // PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); - } else { /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; From 2f1ff56434327ce8f266c67e015acdee7e70804e Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 13 Jun 2022 11:17:25 -0400 Subject: [PATCH 087/215] Documentation updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 19 ++++--------------- parsec/mca/device/cuda/device_cuda_module.c | 13 ++----------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ae12d93c7..e94ea9f4a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -483,7 +483,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - //#if 0 if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { @@ -491,7 +490,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - //assert( task->data[i].data_out->super.super.obj_reference_count == 1); + if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else @@ -505,7 +504,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - //assert( task->data[i].data_out->super.super.obj_reference_count == 1); + if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); else @@ -521,9 +520,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); assert( task->data[i].data_out->super.super.obj_reference_count == 1); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); - } - //#endif - + } PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", @@ -555,12 +552,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[original->owner_device] != NULL); parsec_atomic_lock( &original->lock ); - //task->data[i].data_out = task->data[i].data_out; + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - //PARSEC_DATA_COPY_INC_READERS_ATOMIC( task->data[i].data_out ); - - //PARSEC_OBJ_RETAIN(task->data[i].data_out); - //gpu_task->data_retained |= 1 << i; gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); @@ -572,10 +565,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - //parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); - //PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); - parsec_atomic_unlock( &original->lock ); } } diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 6c58026b4..a578a5b0a 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -989,7 +989,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, temp_loc[j], temp_loc[j]->super.super.obj_reference_count); /* push them at the head to reach them again at the next iteration */ - //assert( temp_loc[j]->super.super.obj_reference_count == 1); assert(temp_loc[j]->device_index == gpu_device->super.device_index); parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } @@ -1038,7 +1037,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); - //assert( lru_gpu_elem->super.super.obj_reference_count == 1); assert(lru_gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { @@ -1067,7 +1065,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, * might be adding/removing other elements to the list, so we * need to protect all accesses to gpu_mem_lru with the locked version */ assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); - //assert( lru_gpu_elem->super.super.obj_reference_count == 1); assert(lru_gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { @@ -1210,7 +1207,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, task_name, gpu_elem, gpu_elem->super.super.obj_reference_count); assert(0 != (gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); - //assert( gpu_elem->super.super.obj_reference_count == 1); assert(gpu_elem->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); parsec_atomic_unlock(&master->lock); @@ -1389,13 +1385,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); + //decrement the reader corresponding to the first stage_in PARSEC_DATA_COPY_DEC_READERS_ATOMIC(candidate); goto src_selected; } } - + // if the task is a migrated task and the possible candidate has already been identified if( (gpu_task->migrate_status > TASK_NOT_MIGRATED) && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) { @@ -2017,7 +2014,6 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, task->data[i].data_in->super.super.obj_reference_count); parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_in); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_in); - //assert( task->data[i].data_in->super.super.obj_reference_count == 1); assert(task->data[i].data_in->device_index == src_device->super.device_index); parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); src_device->data_avail_epoch++; @@ -2081,7 +2077,6 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream, "GPU[%s]:\tMake copy %p [ref_count %d] available after prefetch from gpu_task %p, ec %p", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, gtask, gtask->ec); - //assert( gpu_copy->super.super.obj_reference_count == 1); assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } @@ -2450,7 +2445,6 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); /* TODO: singleton instead? */ - //assert( gpu_copy->super.super.obj_reference_count == 1); assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); update_data_epoch = 1; @@ -2628,14 +2622,12 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, gpu_copy, gpu_copy->super.super.obj_reference_count, __func__); parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); - //assert( gpu_copy->super.super.obj_reference_count == 1); assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } else { PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "CUDA copy %p [ref_count %d] moved to the owned LRU in %s", gpu_copy, gpu_copy->super.super.obj_reference_count, __func__); - //assert( gpu_copy->super.super.obj_reference_count == 1); parsec_list_push_back(&gpu_device->gpu_mem_owned_lru, (parsec_list_item_t*)gpu_copy); } } @@ -2702,7 +2694,6 @@ parsec_cuda_kernel_cleanout( parsec_device_gpu_module_t *gpu_device, */ this_task->data[i].data_out = cpu_copy; if( 0 != (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) { - //assert( gpu_copy->super.super.obj_reference_count == 1); assert(gpu_copy->device_index == gpu_device->super.device_index); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } From 683e89e3bc157fbba16c1ba2796f530f29a24c52 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 15 Jun 2022 16:21:44 -0400 Subject: [PATCH 088/215] mca parameter parsec_cuda_migrate_tasks used to enable migration. Migration is disabled by default. --- parsec/mca/device/cuda/device_cuda_component.c | 4 ++++ parsec/mca/device/cuda/device_cuda_module.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index ffa5de4f0..9d71635e0 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -40,6 +40,7 @@ int parsec_device_cuda_enabled_index, parsec_device_cuda_enabled; int parsec_cuda_sort_pending = 0; int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; char* parsec_cuda_lib_path = NULL; +int parsec_cuda_migrate_tasks = 0; static int cuda_mask, cuda_nvlink_mask; @@ -194,6 +195,9 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "sort_pending_tasks", "Boolean to let the GPU engine sort the first pending tasks stored in the list", false, false, 0, &parsec_cuda_sort_pending); + (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_tasks", + "Boolean to let the GPU engine migrate tasks", + false, false, 0, &parsec_cuda_migrate_tasks); #if defined(PARSEC_PROF_TRACE) (void)parsec_mca_param_reg_int_name("device_cuda", "one_profiling_stream_per_cuda_stream", "Boolean to separate the profiling of each cuda stream into a single profiling stream", diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index a578a5b0a..fd7245da6 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -48,6 +48,8 @@ parsec_cuda_memory_reserve( parsec_device_cuda_module_t* gpu_device, static int parsec_cuda_memory_release( parsec_device_cuda_module_t* gpu_device ); static int parsec_cuda_flush_lru( parsec_device_module_t *device ); +extern int parsec_cuda_migrate_tasks; + /* look up how many FMA per cycle in single/double, per cuda MP * precision. * The following table provides updated values for future archs @@ -2897,7 +2899,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * is deducted from the total number of tasks that will be executed by this * GPU. */ - nb_migrated = migrate_to_starving_device(es, gpu_device); + if(parsec_cuda_migrate_tasks) + nb_migrated = migrate_to_starving_device(es, gpu_device); if( nb_migrated > 0 ) goto crappy_code; From 773836cbd43f1ad0c34eb8f3c4861ad378c2c9a8 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 27 Apr 2022 15:49:40 -0400 Subject: [PATCH 089/215] Fix few issues with our use of spack. - Prevent spack from updating. Pull once and stay at that version until the github runner is updated. - Do not find external dependencies, even if this means that the first time the build will be more expensive. - remote the ${HOME}/.spack directory Signed-off-by: George Bosilca Dont find external dependencies. And remove the .spack directory. Signed-off-by: George Bosilca --- .github/CI/spack_setup.sh | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/CI/spack_setup.sh b/.github/CI/spack_setup.sh index 3a5ae00dc..8f1fb0191 100644 --- a/.github/CI/spack_setup.sh +++ b/.github/CI/spack_setup.sh @@ -28,14 +28,19 @@ save_dir=`pwd` # echo "::group::Spack environment setup" if [ -r ${SPACK_DIR}/.git/FETCH_HEAD ]; then + # We should never allow spack to update it's packages, it increases the + # opportunity for mishandling of the runner environment. Instead, when + # there is a need for update, the entire runner should be flushed, such + # that a fresh spack install is created. last_update=`stat -c %Y ${SPACK_DIR}/.git/FETCH_HEAD` current=`date -d now "+%s"` - if [ $(((current - last_update) / 86400)) -gt 1 ]; then - echo "Last update ${last_update}, current ${current}: Do git pull spack" - cd $SPACK_DIR && git pull - else - echo "git pull was less than one day ago" - fi + echo "spack is $(((current - last_update) / 86400)) days old" + # if [ $(((current - last_update) / 86400)) -gt 1 ]; then + # echo "Last update ${last_update}, current ${current}: Do git pull spack" + # cd $SPACK_DIR && git pull + # else + # echo "git pull was less than one day ago" + # fi else echo "git clone spack" git clone https://github.com/spack/spack $SPACK_DIR || true @@ -44,14 +49,15 @@ else cd $SPACK_DIR && git pull && git status fi +rm -rf ${HOME}/.spack + echo "Load spack environment" source $SPACK_DIR/share/spack/setup-env.sh -spack external find spack compiler find # Start with a fresh env every time -spack env remove -y ${RUNNER_ENV} +spack env remove -y ${RUNNER_ENV} || true cd ${save_dir} mkdir ${RUNNER_ENV} || true From 9a39307f36ddd45cc8cdcb491b8af6527c5536ba Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Mon, 25 Apr 2022 16:18:30 -0400 Subject: [PATCH 090/215] flex: on some machines, flex cannot work if parsec/utils is not created in the build directory before invocation, so lets create it In addition, the pregen script would run git in the build dir (instead of the src dir) Signed-off-by: Aurelien Bouteiller --- cmake_modules/pregen_flex_bison.cmake | 2 ++ parsec/CMakeLists.txt | 2 ++ parsec/interfaces/ptg/ptg-compiler/CMakeLists.txt | 1 + 3 files changed, 5 insertions(+) diff --git a/cmake_modules/pregen_flex_bison.cmake b/cmake_modules/pregen_flex_bison.cmake index a8180903a..7907cff1a 100644 --- a/cmake_modules/pregen_flex_bison.cmake +++ b/cmake_modules/pregen_flex_bison.cmake @@ -49,6 +49,7 @@ if(archive) # Check for problems: stray files in build directory execute_process( COMMAND git ls-files --error-unmatch ${srcdir}/${source} + WORKING_DIRECTORY ${srcdir} RESULT_VARIABLE ret OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr @@ -61,6 +62,7 @@ if(archive) # Check for modifications execute_process( COMMAND git status --porcelain -- ${srcdir}/${source} + WORKING_DIRECTORY ${srcdir} RESULT_VARIABLE ret OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index d2e26eb42..3ee7ed084 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -42,10 +42,12 @@ set(BASE_SOURCES if(FLEX_FOUND AND BISON_FOUND) # generate in the build dir + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/utils") FLEX_TARGET(keyval_flex utils/keyval_lex.l utils/keyval_lex.l.c) FLEX_TARGET(show_help_flex utils/show_help_lex.l utils/show_help_lex.l.c) # Generate in the pregen dir, add to the non-automated rule + file(MAKE_DIRECTORY "${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/utils") FLEX_TARGET(pregen_keyval_flex utils/keyval_lex.l ${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/utils/keyval_lex.l.c COMPILE_FLAGS --noline) FLEX_TARGET(pregen_show_help_flex utils/show_help_lex.l ${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/utils/show_help_lex.l.c COMPILE_FLAGS --noline) add_custom_target(parsec_pregen_flex_utils SOURCES ${FLEX_pregen_keyval_flex_OUTPUTS} ${FLEX_pregen_show_help_flex_OUTPUTS}) diff --git a/parsec/interfaces/ptg/ptg-compiler/CMakeLists.txt b/parsec/interfaces/ptg/ptg-compiler/CMakeLists.txt index e693fe98c..d7785a4ec 100644 --- a/parsec/interfaces/ptg/ptg-compiler/CMakeLists.txt +++ b/parsec/interfaces/ptg/ptg-compiler/CMakeLists.txt @@ -9,6 +9,7 @@ IF(NOT CMAKE_CROSSCOMPILING) ADD_FLEX_BISON_DEPENDENCY(parsec_flex parsec_yacc) # Generate in the pregen dir, add to the non-automated rule to update the archive + file(MAKE_DIRECTORY "${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/interfaces/ptg/ptg-compiler") BISON_TARGET(pregen_parsec_yacc parsec.y ${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/interfaces/ptg/ptg-compiler/parsec.y.c COMPILE_FLAGS -l) FLEX_TARGET(pregen_parsec_flex parsec.l ${PARSEC_PREGEN_FLEX_BISON_DIR}/parsec/interfaces/ptg/ptg-compiler/parsec.l.c COMPILE_FLAGS --noline) add_custom_target(parsec_pregen_ptg SOURCES ${BISON_pregen_parsec_yacc_OUTPUTS} ${FLEX_pregen_parsec_flex_OUTPUTS}) From a09019b2810b4b0129bb6facd5ae22ae2ebce5d8 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 7 Jun 2022 16:29:49 -0400 Subject: [PATCH 091/215] Attempt to backport the revamp of the communication engine (#380) * Port revamp patch * Move around code in parsec_remote_dep.c to minimize patch size * Apply diff pre-revamp -> current master for remote_dep_mpi.c to parsec_remote_dep.c * Move parsec_remote_dep.c back to remote_dep_mpi.c This is done for easier porting, we should move it back eventually. * Reshuffle some more code to condense the diff * Fix missing typedef in remote_dep.h * Pass PARSEC_DATATYPE_NULL instead of -1 to mem_register * Restore call to MPI_Pack_size * Pass data copy instead of device_private pointer to parsec_ce.reshape * Reshape takes the source and destination datatypes * Restore removed code in parsec_remote_dep_memcpy and remote_dep_get_datatypes * Remove task data initialization that breaks DTD * the stack-declared task makes sense for PTG, but not for DTD, where we use the task found in the hash table * parsec_ce.reshape returns false (0) on success... * Move MPI related stuff (overtake, MIN_MPI_TAG) into MPI backend * Cleanup: re-enable profiling, and make the code compile with DEBUG enabled * Install missing header files * Bring back the datatype into the pack/unpack Add support for pack_size. * Remove some use of MPI_Types outside of the MPI backend Signed-off-by: Joseph Schuchart Signed-off-by: George Bosilca Co-authored-by: Thomas Herault Co-authored-by: George Bosilca --- parsec/CMakeLists.txt | 5 + parsec/debug_marks.c | 10 +- parsec/interfaces/ptg/ptg-compiler/jdf2c.c | 3 +- parsec/parsec.c | 2 - parsec/parsec_comm_engine.c | 28 + parsec/parsec_comm_engine.h | 168 +++ parsec/parsec_internal.h | 1 - parsec/parsec_mpi_funnelled.c | 1254 +++++++++++++++++ parsec/parsec_mpi_funnelled.h | 83 ++ parsec/remote_dep.c | 25 +- parsec/remote_dep.h | 287 +++- parsec/remote_dep_mpi.c | 1480 ++++++++------------ tests/dsl/dtd/CMakeLists.txt | 18 + tests/dsl/dtd/dtd_test_ce.c | 683 +++++++++ 14 files changed, 3078 insertions(+), 969 deletions(-) create mode 100644 parsec/parsec_comm_engine.c create mode 100644 parsec/parsec_comm_engine.h create mode 100644 parsec/parsec_mpi_funnelled.c create mode 100644 parsec/parsec_mpi_funnelled.h create mode 100644 tests/dsl/dtd/dtd_test_ce.c diff --git a/parsec/CMakeLists.txt b/parsec/CMakeLists.txt index 3ee7ed084..6db3654e9 100644 --- a/parsec/CMakeLists.txt +++ b/parsec/CMakeLists.txt @@ -113,6 +113,9 @@ set(SOURCES mempool.c private_mempool.c remote_dep.c + parsec_comm_engine.c + parsec_mpi_funnelled.c + remote_dep_mpi.c scheduling.c compound.c vpmap.c @@ -296,6 +299,8 @@ if( BUILD_PARSEC ) ${CMAKE_CURRENT_SOURCE_DIR}/dictionary.h ${CMAKE_CURRENT_SOURCE_DIR}/data.h ${CMAKE_CURRENT_SOURCE_DIR}/private_mempool.h + ${CMAKE_CURRENT_SOURCE_DIR}/bindthread.h + ${CMAKE_CURRENT_SOURCE_DIR}/parsec_comm_engine.h ${CMAKE_CURRENT_SOURCE_DIR}/include/parsec/deprecated.h DESTINATION ${PARSEC_INSTALL_INCLUDEDIR}/parsec) diff --git a/parsec/debug_marks.c b/parsec/debug_marks.c index bb148a7ae..19c880f94 100644 --- a/parsec/debug_marks.c +++ b/parsec/debug_marks.c @@ -85,9 +85,8 @@ void debug_mark_ctl_msg_get_sent(int to, const void *b, const struct remote_dep_ parsec_debug_history_add("Mark: emission of a Get control message to %d\n" "\t Using buffer %p for emission\n" "\t deps requested = 0x%X\n" - "\t which requested = 0x%08x\n" - "\t tag for the reception of data = %d\n", - to, b, m->deps, (uint32_t)m->output_mask, m->tag); + "\t which requested = 0x%08x\n", + to, b, m->source_deps, (uint32_t)m->output_mask); } void debug_mark_ctl_msg_get_recv(int from, const void *b, const struct remote_dep_wire_get_s *m) @@ -95,9 +94,8 @@ void debug_mark_ctl_msg_get_recv(int from, const void *b, const struct remote_de parsec_debug_history_add("Mark: reception of a Get control message from %d\n" "\t Using buffer %p for reception\n" "\t deps requested = 0x%X\n" - "\t which requested = 0x%08x\n" - "\t tag for the reception of data = %d\n", - from, b, m->deps, (uint32_t)m->output_mask, m->tag); + "\t which requested = 0x%08x\n", + from, b, m->source_deps, (uint32_t)m->output_mask); } void debug_mark_dta_msg_start_send(int to, const void *b, int tag) diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index 77474b2f6..431dca4d0 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -1391,7 +1391,8 @@ static void jdf_generate_header_file(const jdf_t* jdf) "#define _%s_h_\n", jdf_basename, jdf_basename); houtput("#include \"parsec.h\"\n" - "#include \"parsec/parsec_internal.h\"\n\n" + "#include \"parsec/parsec_internal.h\"\n" + "#include \"parsec/remote_dep.h\"\n\n" ); houtput("BEGIN_C_DECLS\n\n"); diff --git a/parsec/parsec.c b/parsec/parsec.c index a50b5ea53..9577f51b4 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -866,8 +866,6 @@ parsec_context_t* parsec_init( int nb_cores, int* pargc, char** pargv[] ) __parsec_thread_init( &startup[0] ); - remote_dep_mpi_initialize_execution_stream(context); - /* Wait until all threads are done binding themselves */ parsec_barrier_wait( &(context->barrier) ); context->__parsec_internal_finalization_counter++; diff --git a/parsec/parsec_comm_engine.c b/parsec/parsec_comm_engine.c new file mode 100644 index 000000000..9ca8c3445 --- /dev/null +++ b/parsec/parsec_comm_engine.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2009-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#include +#include "parsec/parsec_mpi_funnelled.h" + +parsec_comm_engine_t parsec_ce; + +/* This function will be called by the runtime */ +parsec_comm_engine_t * +parsec_comm_engine_init(parsec_context_t *parsec_context) +{ + /* call the selected module init */ + parsec_comm_engine_t *ce = mpi_funnelled_init(parsec_context); + + assert(ce->capabilites.sided > 0 && ce->capabilites.sided < 3); + return ce; +} + +int +parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine) +{ + /* call the selected module fini */ + return mpi_funnelled_fini(comm_engine); +} diff --git a/parsec/parsec_comm_engine.h b/parsec/parsec_comm_engine.h new file mode 100644 index 000000000..8ed9bb35d --- /dev/null +++ b/parsec/parsec_comm_engine.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2009-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ +#ifndef __USE_PARSEC_COMM_ENGINE_H__ +#define __USE_PARSEC_COMM_ENGINE_H__ + +#include +#include +#include "parsec/runtime.h" +#include "parsec/datatype.h" + +typedef enum { + PARSEC_MEM_TYPE_CONTIGUOUS = 0, + PARSEC_MEM_TYPE_NONCONTIGUOUS = 1 +} parsec_mem_type_t; + +typedef void* parsec_ce_mem_reg_handle_t; + +typedef struct parsec_comm_engine_capabilites_s parsec_comm_engine_capabilites_t; + +typedef struct parsec_comm_engine_s parsec_comm_engine_t; + +typedef int (*parsec_ce_callback_t)(void *cb_data); + +typedef uint64_t parsec_ce_tag_t; + +typedef int (*parsec_ce_am_callback_t)(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data); + +typedef int (*parsec_ce_tag_register_fn_t)(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length/*bytes*/); + +typedef int (*parsec_ce_tag_unregister_fn_t)(parsec_ce_tag_t tag); + +/* PaRSEC will try to use non-contiguous type for lower layer capable of + * supporting it. + * For non-contiguous type the lower layer will expect layout and count and for + * contiguous only size will be provided. + * Please indicate the mem type using PARSEC_MEM_TYPE_CONTIGUOUS and + * PARSEC_MEM_TYPE_NONCONTIGUOUS. + */ +typedef int (*parsec_ce_mem_register_fn_t)(void *mem, parsec_mem_type_t mem_type, + size_t count, parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size); + +typedef int (*parsec_ce_mem_unregister_fn_t)(parsec_ce_mem_reg_handle_t *lreg); + +typedef int (*parsec_ce_get_mem_reg_handle_size_fn_t)(void); + +typedef int (*parsec_ce_mem_retrieve_fn_t)(parsec_ce_mem_reg_handle_t lreg, void **mem, parsec_datatype_t *datatype, int *count); + +typedef int (*parsec_ce_onesided_callback_t)(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data); + +typedef int (*parsec_ce_put_fn_t)(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); + +typedef int (*parsec_ce_get_fn_t)(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); + +typedef int (*parsec_ce_send_active_message_fn_t)(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, size_t size); + +typedef int (*parsec_ce_progress_fn_t)(parsec_comm_engine_t *comm_engine); + +typedef int (*parsec_ce_enable_fn_t)(parsec_comm_engine_t *comm_engine); +typedef int (*parsec_ce_disable_fn_t)(parsec_comm_engine_t *comm_engine); + +typedef int (*parsec_ce_pack_fn_t)(parsec_comm_engine_t *ce, + void *inbuf, int incount, parsec_datatype_t type, + void *outbuf, int outsize, + int *positionA); + +typedef int (*parsec_ce_pack_size_fn_t)(parsec_comm_engine_t *ce, + int incount, parsec_datatype_t type, + int *size); + +typedef int (*parsec_ce_unpack_fn_t)(parsec_comm_engine_t *ce, + void *inbuf, int insize, int *position, + void *outbuf, int outcount, parsec_datatype_t type); + +typedef int (*parsec_ce_sync_fn_t)(parsec_comm_engine_t *comm_engine); +typedef int (*parsec_ce_can_serve_fn_t)(parsec_comm_engine_t *comm_engine); + +/** + * This function realize a data reshaping, by conceptually packing the dst + * into src. + * TODO: need to distinguish between src_layout and dst_layout + */ +typedef int (*parsec_ce_reshape_fn_t)(parsec_comm_engine_t* ce, + parsec_execution_stream_t* es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src); + +struct parsec_comm_engine_capabilites_s { + unsigned int sided : 2; /* Valid values are 1 and 2 */ + unsigned int supports_noncontiguous_datatype : 1; + unsigned int multithreaded : 1; +}; + +struct parsec_comm_engine_s { + parsec_context_t *parsec_context; + parsec_comm_engine_capabilites_t capabilites; + parsec_ce_tag_register_fn_t tag_register; + parsec_ce_tag_unregister_fn_t tag_unregister; + parsec_ce_mem_register_fn_t mem_register; + parsec_ce_mem_unregister_fn_t mem_unregister; + parsec_ce_get_mem_reg_handle_size_fn_t get_mem_handle_size; + parsec_ce_mem_retrieve_fn_t mem_retrieve; + parsec_ce_put_fn_t put; + parsec_ce_get_fn_t get; + parsec_ce_progress_fn_t progress; + parsec_ce_enable_fn_t enable; + parsec_ce_disable_fn_t disable; + parsec_ce_pack_fn_t pack; + parsec_ce_pack_size_fn_t pack_size; + parsec_ce_unpack_fn_t unpack; + parsec_ce_reshape_fn_t reshape; + parsec_ce_sync_fn_t sync; + parsec_ce_can_serve_fn_t can_serve; + parsec_ce_send_active_message_fn_t send_am; +}; + +/* global comm_engine */ +PARSEC_DECLSPEC extern parsec_comm_engine_t parsec_ce; + +parsec_comm_engine_t * parsec_comm_engine_init(parsec_context_t *parsec_context); +int parsec_comm_engine_fini(parsec_comm_engine_t *comm_engine); + +#endif /* __USE_PARSEC_COMM_ENGINE_H__ */ diff --git a/parsec/parsec_internal.h b/parsec/parsec_internal.h index 95c6eafd2..deb100e3b 100644 --- a/parsec/parsec_internal.h +++ b/parsec/parsec_internal.h @@ -19,7 +19,6 @@ #include "parsec/profiling.h" #include "parsec/mempool.h" #include "parsec/arena.h" -#include "parsec/remote_dep.h" #include "parsec/datarepo.h" #include "parsec/data.h" #include "parsec/utils/debug.h" diff --git a/parsec/parsec_mpi_funnelled.c b/parsec/parsec_mpi_funnelled.c new file mode 100644 index 000000000..f9af87b94 --- /dev/null +++ b/parsec/parsec_mpi_funnelled.c @@ -0,0 +1,1254 @@ +/* + * Copyright (c) 2009-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ + +#include +#include +#include +#include +#include "parsec/parsec_mpi_funnelled.h" +#include "parsec/remote_dep.h" +#include "parsec/class/parsec_hash_table.h" +#include "parsec/class/dequeue.h" +#include "parsec/class/list.h" +#include "parsec/execution_stream.h" +#include "parsec/utils/debug.h" +#include "parsec/utils/mca_param.h" + +/* Range between which tags are allowed to be registered. + * For now we allow 10 tags to be registered + */ +#define MPI_FUNNELLED_MIN_TAG 2 +#define MPI_FUNNELLED_MAX_TAG (MPI_FUNNELLED_MIN_TAG + 10) + +/* Internal TAG for GET and PUT activation message, + * for two sides to agree on a "TAG" to post Irecv and Isend on + */ +#define MPI_FUNNELLED_GET_TAG_INTERNAL 0 +#define MPI_FUNNELLED_PUT_TAG_INTERNAL 1 + +static int +mpi_no_thread_push_posted_req(parsec_comm_engine_t *ce); + +// TODO put all the active ones(for debug) in a table and create a mempool +parsec_mempool_t *mpi_funnelled_mem_reg_handle_mempool = NULL; + +/* Memory handles, opaque to upper layers */ +typedef struct mpi_funnelled_mem_reg_handle_s { + parsec_list_item_t super; + parsec_thread_mempool_t *mempool_owner; + void *self; + void *mem; + parsec_datatype_t datatype; + int count; +} mpi_funnelled_mem_reg_handle_t; + +PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(mpi_funnelled_mem_reg_handle_t); + +/* To create object of class mpi_funnelled_mem_reg_handle_t that inherits + * parsec_list_item_t class + */ +PARSEC_OBJ_CLASS_INSTANCE(mpi_funnelled_mem_reg_handle_t, parsec_list_item_t, + NULL, NULL); + +/* Pointers are converted to long to be used as keys to fetch data in the get + * rdv protocol. Make sure we can carry pointers correctly. + */ +#ifdef PARSEC_HAVE_LIMITS_H +#include +#endif +#if ULONG_MAX < UINTPTR_MAX +#error "unsigned long is not large enough to hold a pointer!" +#endif + +/* note: tags are necessary to order communication between pairs. They are used to + * correctly handle data transfers, as each data provider will provide a tag which + * combined with the source ensure message matching consistency. As MPI requires the + * max tag to be positive, initializing it to a negative value allows us to check + * if the layer has been initialized or not. + */ +#define MIN_MPI_TAG (REMOTE_DEP_MAX_CTRL_TAG+1) +static int MAX_MPI_TAG = -1, mca_tag_ub = -1; +static volatile int __VAL_NEXT_TAG = MIN_MPI_TAG; +#if INT_MAX == INT32_MAX +#define next_tag_cas(t, o, n) parsec_atomic_cas_int32(t, o, n) +#elif INT_MAX == INT64_MAX +#define next_tag_cas(t, o, n) parsec_atomic_cas_int64(t, o, n) +#else +#error "next_tag_cas written to support sizeof(int) of 4 or 8" +#endif +static inline int next_tag(int k) { + int __tag, __tag_o, __next_tag; +reread: + __tag = __tag_o = __VAL_NEXT_TAG; + if( __tag > (MAX_MPI_TAG-k) ) { + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "rank %d tag rollover: min %d < %d (+%d) < max %d", parsec_debug_rank, + MIN_MPI_TAG, __tag, k, MAX_MPI_TAG); + __tag = MIN_MPI_TAG; + } + __next_tag = __tag+k; + + if( parsec_comm_es.virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT ) { + if(!next_tag_cas(&__VAL_NEXT_TAG, __tag_o, __next_tag)) { + goto reread; + } + } + else { + __VAL_NEXT_TAG = __next_tag; + } + return __tag; +} + +/* Range of index allowed for each type of request. + * For registered tags, each will get 5 spots in the array of requests. + * For dynamic tags, there will be a total of MAX_DYNAMIC_REQ_RANGE + * spots in the same array. + */ +#define MAX_DYNAMIC_REQ_RANGE 30 /* according to current implementation */ +#define EACH_STATIC_REQ_RANGE 5 /* for each registered tag */ + +/* Hash table for tag_structure. Each registered tags will be book-kept + * using this structure. + */ +static int tag_hash_table_size = 1<comm_ctx (which is a duplicate of + * whatever the user provides). + */ +static MPI_Comm dep_comm = MPI_COMM_NULL; +/* The internal communicator for all intra-node communications */ +static MPI_Comm dep_self = MPI_COMM_NULL; + +static mpi_funnelled_callback_t *array_of_callbacks; +static MPI_Request *array_of_requests; +static int *array_of_indices; +static MPI_Status *array_of_statuses; + +static int size_of_total_reqs = 0; +static int mpi_funnelled_last_active_req = 0; +static int mpi_funnelled_static_req_idx = 0; + +static int nb_internal_tag = 0; +static int count_internal_tag = 0; + +#if defined(PARSEC_HAVE_MPI_OVERTAKE) +static int parsec_param_enable_mpi_overtake; +#endif + +/* List to hold pending requests */ +parsec_list_t mpi_funnelled_dynamic_req_fifo; /* ordered non threaded fifo */ +parsec_mempool_t *mpi_funnelled_dynamic_req_mempool = NULL; + +/* This structure is used to save all the information necessary to + * invoke a callback after a MPI_Request is satisfied + */ +typedef struct mpi_funnelled_dynamic_req_s { + parsec_list_item_t super; + parsec_thread_mempool_t *mempool_owner; + int post_isend; + MPI_Request request; + mpi_funnelled_callback_t cb; +} mpi_funnelled_dynamic_req_t; + +PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(mpi_funnelled_dynamic_req_t); + +/* To create object of class mpi_funnelled_dynamic_req_t that inherits + * parsec_list_item_t class + */ +PARSEC_OBJ_CLASS_INSTANCE(mpi_funnelled_dynamic_req_t, parsec_list_item_t, + NULL, NULL); + + +/* Data we pass internally inside GET and PUT for handshake and other + * synchronizations. + */ +typedef struct get_am_data_s { + int tag; + parsec_ce_mem_reg_handle_t lreg; + parsec_ce_mem_reg_handle_t rreg; + uintptr_t cb_fn; + uintptr_t deps; +} get_am_data_t; + +typedef struct mpi_funnelled_handshake_info_s { + int tag; + parsec_ce_mem_reg_handle_t source_memory_handle; + parsec_ce_mem_reg_handle_t remote_memory_handle; + uintptr_t cb_fn; + +} mpi_funnelled_handshake_info_t; + +/* This is the callback that is triggered on the sender side for a + * GET. In this function we get the TAG on which the receiver has + * posted an Irecv and using which the sender should post an Isend + */ +static int +mpi_funnelled_internal_get_am_callback(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) ce; (void) tag; (void) msg_size; (void) cb_data; + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + + mpi_funnelled_callback_t *cb; + MPI_Request *request; + + mpi_funnelled_handshake_info_t *handshake_info = (mpi_funnelled_handshake_info_t *) msg; + + + /* This rank sent it's mem_reg in the activation msg, which is being + * sent back as rreg of the msg */ + mpi_funnelled_mem_reg_handle_t *remote_memory_handle = (mpi_funnelled_mem_reg_handle_t *) (handshake_info->remote_memory_handle); /* This is the memory handle of the remote(our) side */ + + assert(mpi_funnelled_last_active_req >= mpi_funnelled_static_req_idx); + + int post_in_static_array = mpi_funnelled_last_active_req < size_of_total_reqs; + mpi_funnelled_dynamic_req_t *item; + + if(post_in_static_array) { + request = &array_of_requests[mpi_funnelled_last_active_req]; + cb = &array_of_callbacks[mpi_funnelled_last_active_req]; + MPI_Isend(remote_memory_handle->mem, remote_memory_handle->count, remote_memory_handle->datatype, + src, handshake_info->tag, dep_comm, + request); + } else { + item = (mpi_funnelled_dynamic_req_t *)parsec_thread_mempool_allocate(mpi_funnelled_dynamic_req_mempool->thread_mempools); + item->post_isend = 1; + request = &item->request; + cb = &item->cb; + } + + /* we(the remote side) requested the source to forward us callback data that will be passed + * to the callback function to notify upper level that the data has reached. We are copying + * the callback data sent from the source. + */ + void *callback_data = malloc(msg_size - sizeof(mpi_funnelled_handshake_info_t)); + memcpy( callback_data, + ((char*)msg) + sizeof(mpi_funnelled_handshake_info_t), + msg_size - sizeof(mpi_funnelled_handshake_info_t) ); + + cb->cb_type.onesided_mimic_am.fct = (parsec_ce_am_callback_t) handshake_info->cb_fn; + cb->cb_type.onesided_mimic_am.msg = callback_data; + cb->storage1 = mpi_funnelled_last_active_req; + cb->storage2 = src; + cb->cb_data = cb->cb_data; + cb->tag = NULL; + cb->type = MPI_FUNNELLED_TYPE_ONESIDED_MIMIC_AM; + + if(post_in_static_array) { + mpi_funnelled_last_active_req++; + } else { + parsec_list_nolock_push_back(&mpi_funnelled_dynamic_req_fifo, + (parsec_list_item_t *)item); + /*if(mpi_funnelled_last_active_req < size_of_total_reqs) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_push_posted_req(ce); + }*/ + } + + return 1; +} + +/* This is the callback that is triggered on the receiver side for a + * PUT. This is where we know the TAG to post the Irecv on. + */ +static int +mpi_funnelled_internal_put_am_callback(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) ce; (void) tag; (void)msg_size; (void)cb_data; + + mpi_funnelled_callback_t *cb; + MPI_Request *request; + + mpi_funnelled_handshake_info_t *handshake_info = (mpi_funnelled_handshake_info_t *) msg; + + mpi_funnelled_mem_reg_handle_t *remote_memory_handle = (mpi_funnelled_mem_reg_handle_t *) (handshake_info->remote_memory_handle); /* This is the memory handle of the remote(our) side */ + + assert(handshake_info->tag >= MIN_MPI_TAG); + assert(mpi_funnelled_last_active_req >= mpi_funnelled_static_req_idx); + + int _size; + MPI_Type_size(remote_memory_handle->datatype, &_size); + + int post_in_static_array = 1; + mpi_funnelled_dynamic_req_t *item; + if(!(mpi_funnelled_last_active_req < size_of_total_reqs)) { + post_in_static_array = 0; + } + + if(post_in_static_array) { + request = &array_of_requests[mpi_funnelled_last_active_req]; + cb = &array_of_callbacks[mpi_funnelled_last_active_req]; + } else { + /* we are not delaying posting the Irecv as the other side will post the Isend as soon + * as it get an acknowledgement of the completion of the active message it sent for handshake. + * This ensures we are not generating MPI unexpected and all the sends and receives are in order. + */ + item = (mpi_funnelled_dynamic_req_t *)parsec_thread_mempool_allocate(mpi_funnelled_dynamic_req_mempool->thread_mempools); + item->post_isend = 0; + request = &item->request; + cb = &item->cb; + } + + MPI_Irecv(remote_memory_handle->mem, remote_memory_handle->count, remote_memory_handle->datatype, + src, handshake_info->tag, dep_comm, request); + + /* we(the remote side) requested the source to forward us callback data that will be passed + * to the callback function to notify upper level that the data has reached. We are copying + * the callback data sent from the source. + */ + void *callback_data = malloc(msg_size - sizeof(mpi_funnelled_handshake_info_t)); + memcpy( callback_data, + ((char*)msg) + sizeof(mpi_funnelled_handshake_info_t), + msg_size - sizeof(mpi_funnelled_handshake_info_t) ); + + /* We sent the pointer to the call back function for PUT over notification. + * For a TRUE one sided this would be accomplished by an active message at + * the tag of the integer value of the function pointer we trigger as callback. + */ + cb->cb_type.onesided_mimic_am.fct = (parsec_ce_am_callback_t) handshake_info->cb_fn; + cb->cb_type.onesided_mimic_am.msg = callback_data; + cb->storage1 = mpi_funnelled_last_active_req; + cb->storage2 = src; + cb->cb_data = cb->cb_data; + cb->tag = NULL; + cb->type = MPI_FUNNELLED_TYPE_ONESIDED_MIMIC_AM; + + if(post_in_static_array) { + mpi_funnelled_last_active_req++; + } else { + parsec_list_nolock_push_back(&mpi_funnelled_dynamic_req_fifo, + (parsec_list_item_t *)item); + /*if(mpi_funnelled_last_active_req < size_of_total_reqs) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_push_posted_req(ce); + }*/ + } + + return 1; +} + +int parsec_mpi_sendrecv(parsec_comm_engine_t *ce, + parsec_execution_stream_t* es, + parsec_data_copy_t *dst, + int64_t displ_dst, + parsec_datatype_t layout_dst, + uint64_t count_dst, + parsec_data_copy_t *src, + int64_t displ_src, + parsec_datatype_t layout_src, + uint64_t count_src) +{ + int rc; + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, + "COPY LOCAL DATA from %p (%d elements of dtt %p) to %p (%d elements of dtt %p)", + PARSEC_DATA_COPY_GET_PTR(src) + displ_src, count_src, layout_src, + PARSEC_DATA_COPY_GET_PTR(dst) + displ_dst, count_dst, layout_dst); + rc = MPI_Sendrecv((char*)PARSEC_DATA_COPY_GET_PTR(src) + displ_src, + count_src, layout_src, 0, es->th_id, + (char*)PARSEC_DATA_COPY_GET_PTR(dst) + displ_dst, + count_dst, layout_dst, 0, es->th_id, + dep_self, MPI_STATUS_IGNORE); + (void)ce; + return (MPI_SUCCESS == rc ? 0 : -1); +} + +/** + * The following function take care of all the steps necessary to initialize the + * invariable part of the communication engine such as the const dependencies + * to MPI (max tag and other global info), or local objects. + */ +static int mpi_funneled_init_once(parsec_context_t* context) +{ + int mpi_tag_ub_exists, *ub; + + assert(-1 == MAX_MPI_TAG); + + assert(MPI_COMM_NULL == dep_self); + MPI_Comm_dup(MPI_COMM_SELF, &dep_self); + assert(MPI_COMM_NULL == dep_comm); + + /* + * Based on MPI 1.1 the MPI_TAG_UB should only be defined + * on MPI_COMM_WORLD. + */ +#if defined(PARSEC_HAVE_MPI_20) + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &ub, &mpi_tag_ub_exists); +#else + MPI_Attr_get(MPI_COMM_WORLD, MPI_TAG_UB, &ub, &mpi_tag_ub_exists); +#endif /* defined(PARSEC_HAVE_MPI_20) */ + + parsec_mca_param_reg_int_name("mpi", "tag_ub", + "The upper bound of the TAG used by the MPI communication engine. Bounded by the MPI_TAG_UB attribute on the MPI implementation MPI_COMM_WORLD. (-1 for MPI default)", + false, false, -1, &mca_tag_ub); + + if( !mpi_tag_ub_exists ) { + MAX_MPI_TAG = (-1 == mca_tag_ub) ? INT_MAX : mca_tag_ub; + parsec_warning("Your MPI implementation does not define MPI_TAG_UB and thus violates the standard (MPI-2.2, page 29, line 30). The max tag is therefore set using the MCA mpi_tag_ub (current value %d).\n", MAX_MPI_TAG); + } else { + MAX_MPI_TAG = ((-1 == mca_tag_ub) || (mca_tag_ub > *ub)) ? *ub : mca_tag_ub; + } + if( MAX_MPI_TAG < INT_MAX ) { + parsec_debug_verbose(3, parsec_comm_output_stream, + "MPI:\tYour MPI implementation defines the maximal TAG value to %d (0x%08x)," + " which might be too small should you have more than %d pending remote dependencies", + MAX_MPI_TAG, (unsigned int)MAX_MPI_TAG, MAX_MPI_TAG / MAX_DEP_OUT_COUNT); + } + + (void)context; + return 0; +} + +parsec_comm_engine_t * +mpi_funnelled_init(parsec_context_t *context) +{ + int i, rc; + + if( -1 == MAX_MPI_TAG ) + if( 0 != (rc = mpi_funneled_init_once(context)) ) { + parsec_debug_verbose(3, parsec_comm_output_stream, "MPI: Failed to correctly retrieve the max TAG." + " PaRSEC cannot continue using MPI\n"); + return NULL; + } + + /* Did anything changed that would require a build of the management structures? */ + assert(-1 != context->comm_ctx); + if(dep_comm == (MPI_Comm)context->comm_ctx) { + return &parsec_ce; + } + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "rank %d ENABLE MPI communication engine", + parsec_debug_rank); + + dep_comm = (MPI_Comm) context->comm_ctx; + +#if defined(PARSEC_HAVE_MPI_OVERTAKE) + if( parsec_param_enable_mpi_overtake ) { + MPI_Info no_order; + MPI_Info_create(&no_order); + MPI_Info_set(no_order, "mpi_assert_allow_overtaking", "true"); + MPI_Comm_set_info(dep_comm, no_order); + MPI_Info_free(&no_order); + } +#endif + + MPI_Comm_size(dep_comm, &(context->nb_nodes)); + MPI_Comm_rank(dep_comm, &(context->my_rank)); + + /* init hash table for registered tags */ + tag_hash_table = PARSEC_OBJ_NEW(parsec_hash_table_t); + for(i = 1; i < 16 && (1 << i) < tag_hash_table_size; i++) /* do nothing */; + parsec_hash_table_init(tag_hash_table, + offsetof(mpi_funnelled_tag_t, ht_item), + i, + tag_key_fns, + tag_hash_table); + + /* Initialize the arrays */ + array_of_callbacks = (mpi_funnelled_callback_t *) calloc(MAX_DYNAMIC_REQ_RANGE, + sizeof(mpi_funnelled_callback_t)); + array_of_requests = (MPI_Request *) calloc(MAX_DYNAMIC_REQ_RANGE, + sizeof(MPI_Request)); + array_of_indices = (int *) calloc(MAX_DYNAMIC_REQ_RANGE, sizeof(int)); + array_of_statuses = (MPI_Status *) calloc(MAX_DYNAMIC_REQ_RANGE, + sizeof(MPI_Status)); + + for(i = 0; i < MAX_DYNAMIC_REQ_RANGE; i++) { + array_of_requests[i] = MPI_REQUEST_NULL; + } + + size_of_total_reqs += MAX_DYNAMIC_REQ_RANGE; + + nb_internal_tag = 2; + + /* Make all the fn pointers point to this component's function */ + parsec_ce.tag_register = mpi_no_thread_tag_register; + parsec_ce.tag_unregister = mpi_no_thread_tag_unregister; + parsec_ce.mem_register = mpi_no_thread_mem_register; + parsec_ce.mem_unregister = mpi_no_thread_mem_unregister; + parsec_ce.get_mem_handle_size = mpi_no_thread_get_mem_reg_handle_size; + parsec_ce.mem_retrieve = mpi_no_thread_mem_retrieve; + parsec_ce.put = mpi_no_thread_put; + parsec_ce.get = mpi_no_thread_get; + parsec_ce.progress = mpi_no_thread_progress; + parsec_ce.enable = mpi_no_thread_enable; + parsec_ce.disable = mpi_no_thread_disable; + parsec_ce.pack = mpi_no_thread_pack; + parsec_ce.pack_size = mpi_no_thread_pack_size; + parsec_ce.unpack = mpi_no_thread_unpack; + parsec_ce.sync = mpi_no_thread_sync; + parsec_ce.reshape = parsec_mpi_sendrecv; + parsec_ce.can_serve = mpi_no_thread_can_push_more; + parsec_ce.send_am = mpi_no_thread_send_active_message; + + parsec_ce.parsec_context = context; + parsec_ce.capabilites.sided = 2; + parsec_ce.capabilites.supports_noncontiguous_datatype = 1; + + /* Register for internal GET and PUT AMs */ + parsec_ce.tag_register(MPI_FUNNELLED_GET_TAG_INTERNAL, + mpi_funnelled_internal_get_am_callback, + context, + 4096); + count_internal_tag++; + + parsec_ce.tag_register(MPI_FUNNELLED_PUT_TAG_INTERNAL, + mpi_funnelled_internal_put_am_callback, + context, + 4096); + count_internal_tag++; + + PARSEC_OBJ_CONSTRUCT(&mpi_funnelled_dynamic_req_fifo, parsec_list_t); + + mpi_funnelled_mem_reg_handle_mempool = (parsec_mempool_t*) malloc (sizeof(parsec_mempool_t)); + parsec_mempool_construct(mpi_funnelled_mem_reg_handle_mempool, + PARSEC_OBJ_CLASS(mpi_funnelled_mem_reg_handle_t), sizeof(mpi_funnelled_mem_reg_handle_t), + offsetof(mpi_funnelled_mem_reg_handle_t, mempool_owner), + 1); + + mpi_funnelled_dynamic_req_mempool = (parsec_mempool_t*) malloc (sizeof(parsec_mempool_t)); + parsec_mempool_construct(mpi_funnelled_dynamic_req_mempool, + PARSEC_OBJ_CLASS(mpi_funnelled_dynamic_req_t), sizeof(mpi_funnelled_dynamic_req_t), + offsetof(mpi_funnelled_dynamic_req_t, mempool_owner), + 1); + + return &parsec_ce; +} + +/** + * The communication engine is now completely disabled. All internal resources + * are released, and no future communications are possible. + * Anything initialized in init_once must be disposed off here + */ +int +mpi_funnelled_fini(parsec_comm_engine_t *ce) +{ + assert( -1 != MAX_MPI_TAG ); + + /* TODO: GO through all registered tags and unregister them */ + ce->tag_unregister(MPI_FUNNELLED_GET_TAG_INTERNAL); + ce->tag_unregister(MPI_FUNNELLED_PUT_TAG_INTERNAL); + + free(array_of_callbacks); array_of_callbacks = NULL; + free(array_of_requests); array_of_requests = NULL; + free(array_of_indices); array_of_indices = NULL; + free(array_of_statuses); array_of_statuses = NULL; + + parsec_hash_table_fini(tag_hash_table); + PARSEC_OBJ_RELEASE(tag_hash_table); + + PARSEC_OBJ_DESTRUCT(&mpi_funnelled_dynamic_req_fifo); + + parsec_mempool_destruct(mpi_funnelled_mem_reg_handle_mempool); + free(mpi_funnelled_mem_reg_handle_mempool); + + parsec_mempool_destruct(mpi_funnelled_dynamic_req_mempool); + free(mpi_funnelled_dynamic_req_mempool); + + /* Remove the static handles */ + MPI_Comm_free(&dep_self); /* dep_self becomes MPI_COMM_NULL */ + + /* Release the context communicators if any */ + if( -1 != ce->parsec_context->comm_ctx) { + MPI_Comm_free((MPI_Comm*)&ce->parsec_context->comm_ctx); + ce->parsec_context->comm_ctx = -1; /* We use -1 for the opaque comm_ctx, rather than the MPI specific MPI_COMM_NULL */ + } + + MAX_MPI_TAG = -1; /* mark the layer as uninitialized */ + + return 1; +} + +/* Users need to register all tags before finalizing the comm + * engine init. + * The requested tags should be from 0 up to MPI_FUNNELLED_MAX_TAG, + * dynamic tags will start from MPI_FUNNELLED_MAX_TAG. + */ +int +mpi_no_thread_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t callback, + void *cb_data, + size_t msg_length) +{ + mpi_funnelled_callback_t *cb; + + /* All internal tags has been registered */ + if(nb_internal_tag == count_internal_tag) { + if(tag < MPI_FUNNELLED_MIN_TAG || tag >= MPI_FUNNELLED_MAX_TAG) { + parsec_warning("Tag is out of range, it has to be between %d - %d\n", MPI_FUNNELLED_MIN_TAG, MPI_FUNNELLED_MAX_TAG); + return PARSEC_ERR_VALUE_OUT_OF_BOUNDS; + } + assert( (tag >= MPI_FUNNELLED_MIN_TAG) && (tag < MPI_FUNNELLED_MAX_TAG) ); + } + + if(NULL != parsec_hash_table_nolock_find(tag_hash_table, (parsec_key_t)tag)) { + parsec_warning("Tag: %d is already registered\n", (int)tag); + return PARSEC_ERR_EXISTS; + } + + size_of_total_reqs += EACH_STATIC_REQ_RANGE; + + array_of_indices = realloc(array_of_indices, size_of_total_reqs * sizeof(int)); + array_of_statuses = realloc(array_of_statuses, size_of_total_reqs * sizeof(MPI_Status)); + + /* Packing persistent tags in the beginning of the array */ + /* Allocate a new array that is "EACH_STATIC_REQ_RANGE" size bigger + * than the previous allocation. + */ + mpi_funnelled_callback_t *tmp_array_cb = malloc(sizeof(mpi_funnelled_callback_t) * size_of_total_reqs); + /* Copy any previous persistent message info in the beginning */ + memcpy(tmp_array_cb, array_of_callbacks, sizeof(mpi_funnelled_callback_t) * mpi_funnelled_static_req_idx); + /* Leaving "EACH_STATIC_REQ_RANGE" number elements in the middle and copying + * the rest for the dynamic tag messages. + */ + memcpy(tmp_array_cb + mpi_funnelled_static_req_idx + EACH_STATIC_REQ_RANGE, array_of_callbacks + mpi_funnelled_static_req_idx, sizeof(mpi_funnelled_callback_t) * MAX_DYNAMIC_REQ_RANGE); + free(array_of_callbacks); + array_of_callbacks = tmp_array_cb; + + /* Same procedure followed as array_of_callbacks. */ + MPI_Request *tmp_array_req = malloc(sizeof(MPI_Request) * size_of_total_reqs); + memcpy(tmp_array_req, array_of_requests, sizeof(MPI_Request) * mpi_funnelled_static_req_idx); + memcpy(tmp_array_req + mpi_funnelled_static_req_idx + EACH_STATIC_REQ_RANGE, array_of_requests + mpi_funnelled_static_req_idx, sizeof(MPI_Request) * MAX_DYNAMIC_REQ_RANGE); + free(array_of_requests); + array_of_requests = tmp_array_req; + + char **buf = (char **) calloc(EACH_STATIC_REQ_RANGE, sizeof(char *)); + buf[0] = (char*)calloc(EACH_STATIC_REQ_RANGE, msg_length * sizeof(char)); + + mpi_funnelled_tag_t *tag_struct = malloc(sizeof(mpi_funnelled_tag_t)); + tag_struct->tag = tag; + tag_struct->buf = buf; + tag_struct->start_idx = mpi_funnelled_static_req_idx; + tag_struct->msg_length = msg_length; + + for(int i = 0; i < EACH_STATIC_REQ_RANGE; i++) { + buf[i] = buf[0] + i * msg_length * sizeof(char); + + /* Even though the address of array_of_requests changes after every + * new registration of tags, the initialization of the requests will + * still work as the memory is copied after initialization. + */ + MPI_Recv_init(buf[i], msg_length, MPI_BYTE, + MPI_ANY_SOURCE, tag, dep_comm, + &array_of_requests[mpi_funnelled_static_req_idx]); + + cb = &array_of_callbacks[mpi_funnelled_static_req_idx]; + cb->cb_type.am.fct = callback; + cb->storage1 = mpi_funnelled_static_req_idx; + cb->storage2 = i; + cb->cb_data = cb_data; + cb->tag = tag_struct; + cb->type = MPI_FUNNELLED_TYPE_AM; + MPI_Start(&array_of_requests[mpi_funnelled_static_req_idx]); + mpi_funnelled_static_req_idx++; + } + + /* insert in ht for bookkeeping */ + tag_struct->ht_item.key = (parsec_key_t)tag; + parsec_hash_table_nolock_insert(tag_hash_table, &tag_struct->ht_item ); + + assert((mpi_funnelled_static_req_idx + MAX_DYNAMIC_REQ_RANGE) == size_of_total_reqs); + + mpi_funnelled_last_active_req += EACH_STATIC_REQ_RANGE; + + return PARSEC_SUCCESS; +} + +int +mpi_no_thread_tag_unregister(parsec_ce_tag_t tag) +{ + mpi_funnelled_tag_t *tag_struct = parsec_hash_table_nolock_find(tag_hash_table, (parsec_key_t)tag); + if(NULL == tag_struct) { + parsec_inform("Tag %ld is not registered\n", (int)tag); + return 0; + } + + /* remove this tag from the arrays */ + /* WARNING: Assumed after this no wait or test will be called on + * array_of_requests + */ + int i, flag; + MPI_Status status; + + for(i = tag_struct->start_idx; i < tag_struct->start_idx + EACH_STATIC_REQ_RANGE; i++) { + MPI_Cancel(&array_of_requests[i]); + MPI_Test(&array_of_requests[i], &flag, &status); + MPI_Request_free(&array_of_requests[i]); + assert( MPI_REQUEST_NULL == array_of_requests[i] ); + } + + parsec_hash_table_remove(tag_hash_table, (parsec_key_t)tag); + + free(tag_struct->buf[0]); + free(tag_struct->buf); + + free(tag_struct); + + return 1; +} + +int +mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, + size_t count, parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size) +{ + /* For now we only expect non-contiguous data or a layout and count */ + assert(mem_type == PARSEC_MEM_TYPE_NONCONTIGUOUS); + (void) mem_type; (void) mem_size; + + /* This is mpi two_sided, the type can be of noncontiguous */ + *lreg = (void *)parsec_thread_mempool_allocate(mpi_funnelled_mem_reg_handle_mempool->thread_mempools); + + mpi_funnelled_mem_reg_handle_t *handle = (mpi_funnelled_mem_reg_handle_t *) *lreg; + *lreg_size = sizeof(mpi_funnelled_mem_reg_handle_t); + + handle->self = handle; + handle->mem = mem; + handle->datatype = datatype; + handle->count = count; + + // Push in a table + + return 1; +} + +int +mpi_no_thread_mem_unregister(parsec_ce_mem_reg_handle_t *lreg) +{ + //remove from table + + mpi_funnelled_mem_reg_handle_t *handle = (mpi_funnelled_mem_reg_handle_t *) *lreg; + parsec_thread_mempool_free(mpi_funnelled_mem_reg_handle_mempool->thread_mempools, handle->self); + return 1; +} + +/* Returns the size of the memory handle that is opaque to the upper level */ +int mpi_no_thread_get_mem_reg_handle_size(void) +{ + return sizeof(mpi_funnelled_mem_reg_handle_t); +} + +/* Return the address of memory and the size that was registered + * with a mem_reg_handle + */ +int +mpi_no_thread_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, + void **mem, parsec_datatype_t *datatype, int *count) +{ + mpi_funnelled_mem_reg_handle_t *handle = (mpi_funnelled_mem_reg_handle_t *) lreg; + *mem = handle->mem; + *datatype = handle->datatype; + *count = handle->count; + + return 1; +} + +int +mpi_no_thread_put(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size) +{ + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + + (void)r_cb_data; (void) size; + + mpi_funnelled_callback_t *cb; + MPI_Request *request; + + int tag = next_tag(1); + assert(tag >= MIN_MPI_TAG); + + mpi_funnelled_mem_reg_handle_t *source_memory_handle = (mpi_funnelled_mem_reg_handle_t *) lreg; + mpi_funnelled_mem_reg_handle_t *remote_memory_handle = (mpi_funnelled_mem_reg_handle_t *) rreg; + + + mpi_funnelled_handshake_info_t handshake_info; + + handshake_info.tag = tag; + handshake_info.source_memory_handle = source_memory_handle; + handshake_info.remote_memory_handle = remote_memory_handle->self; /* pass the actual pointer + instead of copying the whole + memory_handle */ + handshake_info.cb_fn = (uintptr_t) r_tag; + + /* We pack the static message(handshake_info) and the callback data + * the other side have sent us, to be forwarded. + */ + int buf_size = sizeof(mpi_funnelled_handshake_info_t) + r_cb_data_size; + void *buf = malloc(buf_size); + memcpy( buf, + &handshake_info, + sizeof(mpi_funnelled_handshake_info_t) ); + memcpy( ((char *)buf) + sizeof(mpi_funnelled_handshake_info_t), + r_cb_data, + r_cb_data_size ); + + /* Send AM to src to post Isend on this tag */ + /* this is blocking, so using data on stack is not a problem */ + ce->send_am(ce, MPI_FUNNELLED_PUT_TAG_INTERNAL, remote, buf, buf_size); + + free(buf); + + assert(mpi_funnelled_last_active_req >= mpi_funnelled_static_req_idx); + /* Now we can post the Isend on the lreg */ + /*MPI_Isend((char *)ldata->mem + ldispl, ldata->size, MPI_BYTE, remote, tag, comm, + &array_of_requests[mpi_funnelled_last_active_req]);*/ + + int post_in_static_array = 1; + mpi_funnelled_dynamic_req_t *item; + if(!(mpi_funnelled_last_active_req < size_of_total_reqs)) { + post_in_static_array = 0; + } + + if(post_in_static_array) { + request = &array_of_requests[mpi_funnelled_last_active_req]; + cb = &array_of_callbacks[mpi_funnelled_last_active_req]; + MPI_Isend((char *)source_memory_handle->mem + ldispl, source_memory_handle->count, + source_memory_handle->datatype, remote, tag, dep_comm, + request); + } else { + item = (mpi_funnelled_dynamic_req_t *)parsec_thread_mempool_allocate(mpi_funnelled_dynamic_req_mempool->thread_mempools); + item->post_isend = 1; + request = &item->request; + cb = &item->cb; + } + + cb->cb_type.onesided.fct = l_cb; + cb->storage1 = mpi_funnelled_last_active_req; + cb->storage2 = remote; + cb->cb_data = l_cb_data; + cb->cb_type.onesided.lreg = source_memory_handle->self; + cb->cb_type.onesided.ldispl = ldispl; + cb->cb_type.onesided.rreg = remote_memory_handle; + cb->cb_type.onesided.rdispl = rdispl; + cb->cb_type.onesided.size = tag; /* This should be taken care of */ + cb->cb_type.onesided.remote = remote; + cb->tag = NULL; + cb->type = MPI_FUNNELLED_TYPE_ONESIDED; + + if(post_in_static_array) { + mpi_funnelled_last_active_req++; + } else { + parsec_list_nolock_push_back(&mpi_funnelled_dynamic_req_fifo, + (parsec_list_item_t *)item); + /*if(mpi_funnelled_last_active_req < size_of_total_reqs) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_push_posted_req(ce); + } */ + } + + return 1; +} + +int +mpi_no_thread_get(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size) +{ + (void)r_tag; (void)r_cb_data; + + mpi_funnelled_callback_t *cb; + MPI_Request *request; + + int tag = next_tag(1); + + mpi_funnelled_mem_reg_handle_t *source_memory_handle = (mpi_funnelled_mem_reg_handle_t *) lreg; + mpi_funnelled_mem_reg_handle_t *remote_memory_handle = (mpi_funnelled_mem_reg_handle_t *) rreg; + + + mpi_funnelled_handshake_info_t handshake_info; + + handshake_info.tag = tag; + handshake_info.source_memory_handle = source_memory_handle; + handshake_info.remote_memory_handle = remote_memory_handle->self; /* we store the actual pointer, as we + do not pass the while handle */ + handshake_info.cb_fn = r_tag; /* This is what the other side has passed to us to invoke when the GET is done */ + + /* Packing the callback data the other side has sent us and sending it back to them */ + int buf_size = sizeof(mpi_funnelled_handshake_info_t) + r_cb_data_size; + + void *buf = malloc(buf_size); + memcpy( buf, + &handshake_info, + sizeof(mpi_funnelled_handshake_info_t) ); + memcpy( ((char *)buf) + sizeof(mpi_funnelled_handshake_info_t), + r_cb_data, + r_cb_data_size ); + + + /* Send AM to src to post Isend on this tag */ + /* this is blocking, so using data on stack is not a problem */ + ce->send_am(ce, MPI_FUNNELLED_GET_TAG_INTERNAL, remote, buf, buf_size); + + free(buf); + + + assert(mpi_funnelled_last_active_req >= mpi_funnelled_static_req_idx); + + int post_in_static_array = 1; + mpi_funnelled_dynamic_req_t *item; + if(!(mpi_funnelled_last_active_req < size_of_total_reqs)) { + post_in_static_array = 0; + } + + if(post_in_static_array) { + request = &array_of_requests[mpi_funnelled_last_active_req]; + cb = &array_of_callbacks[mpi_funnelled_last_active_req]; + } else { + item = (mpi_funnelled_dynamic_req_t *)parsec_thread_mempool_allocate(mpi_funnelled_dynamic_req_mempool->thread_mempools); + item->post_isend = 0; + request = &item->request; + cb = &item->cb; + } + + MPI_Irecv((char*)source_memory_handle->mem + ldispl, source_memory_handle->count, source_memory_handle->datatype, + remote, tag, dep_comm, + request); + + cb->cb_type.onesided.fct = l_cb; + cb->storage1 = mpi_funnelled_last_active_req; + cb->storage2 = remote; + cb->cb_data = l_cb_data; + cb->cb_type.onesided.lreg = source_memory_handle; + cb->cb_type.onesided.ldispl = ldispl; + cb->cb_type.onesided.rreg = remote_memory_handle; + cb->cb_type.onesided.rdispl = rdispl; + cb->cb_type.onesided.size = size; + cb->cb_type.onesided.remote = remote; + cb->tag = NULL; + cb->type = MPI_FUNNELLED_TYPE_ONESIDED; + + if(post_in_static_array) { + mpi_funnelled_last_active_req++; + } else { + parsec_list_nolock_push_back(&mpi_funnelled_dynamic_req_fifo, + (parsec_list_item_t *)item); + /*if(mpi_funnelled_last_active_req < size_of_total_reqs) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_push_posted_req(ce); + }*/ + } + + return 1; +} + +int +mpi_no_thread_send_active_message(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + int remote, + void *addr, size_t size) +{ + (void) ce; + parsec_key_t key = 0 | tag ; + mpi_funnelled_tag_t *tag_struct = parsec_hash_table_nolock_find(tag_hash_table, key); + assert(tag_struct->msg_length >= size); + (void) tag_struct; + + MPI_Send(addr, size, MPI_BYTE, remote, tag, dep_comm); + + return 1; +} + +/* Common function to serve callbacks of completed request */ +int +mpi_no_thread_serve_cb(parsec_comm_engine_t *ce, mpi_funnelled_callback_t *cb, + int mpi_tag, int mpi_source, int length, void *buf, + int reset) +{ + int ret = 0; + if(cb->type == MPI_FUNNELLED_TYPE_AM) { + if(cb->cb_type.am.fct != NULL) { + ret = cb->cb_type.am.fct(ce, mpi_tag, buf, length, + mpi_source, cb->cb_data); + } + /* this is a persistent request, let's reset it if reset variable is ON */ + if(reset) { + /* Let's re-enable the pending request in the same position */ + MPI_Start(&array_of_requests[cb->storage1]); + } + } else if(cb->type == MPI_FUNNELLED_TYPE_ONESIDED) { + if(cb->cb_type.onesided.fct != NULL) { + ret = cb->cb_type.onesided.fct(ce, cb->cb_type.onesided.lreg, + cb->cb_type.onesided.ldispl, + cb->cb_type.onesided.rreg, + cb->cb_type.onesided.rdispl, + cb->cb_type.onesided.size, + cb->cb_type.onesided.remote, + cb->cb_data); + } + } else if (cb->type == MPI_FUNNELLED_TYPE_ONESIDED_MIMIC_AM) { + if(cb->cb_type.onesided_mimic_am.fct != NULL) { + ret = cb->cb_type.onesided_mimic_am.fct(ce, mpi_tag, cb->cb_type.onesided_mimic_am.msg, + length, mpi_source, cb->cb_data); + free(cb->cb_type.onesided_mimic_am.msg); + } + } else { + /* We only have three types */ + assert(0); + } + + return ret; +} + +static int +mpi_no_thread_push_posted_req(parsec_comm_engine_t *ce) +{ + (void) ce; + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + + mpi_funnelled_dynamic_req_t *item; + item = (mpi_funnelled_dynamic_req_t *) parsec_list_nolock_pop_front(&mpi_funnelled_dynamic_req_fifo); + + MPI_Request tmp = array_of_requests[mpi_funnelled_last_active_req]; + array_of_requests[mpi_funnelled_last_active_req] = item->request; + item->request = tmp; + item->request = MPI_REQUEST_NULL; + + array_of_callbacks[mpi_funnelled_last_active_req].storage1 = item->cb.storage1; + array_of_callbacks[mpi_funnelled_last_active_req].storage2 = item->cb.storage2; + array_of_callbacks[mpi_funnelled_last_active_req].cb_data = item->cb.cb_data; + array_of_callbacks[mpi_funnelled_last_active_req].type = item->cb.type; + array_of_callbacks[mpi_funnelled_last_active_req].tag = item->cb.tag; + + if(item->cb.type == MPI_FUNNELLED_TYPE_ONESIDED) { + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.fct = item->cb.cb_type.onesided.fct; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.lreg = item->cb.cb_type.onesided.lreg; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.ldispl = item->cb.cb_type.onesided.ldispl; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.rreg = item->cb.cb_type.onesided.rreg; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.rdispl = item->cb.cb_type.onesided.rdispl; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.size = item->cb.cb_type.onesided.size; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided.remote = item->cb.cb_type.onesided.remote; + } else if (item->cb.type == MPI_FUNNELLED_TYPE_ONESIDED_MIMIC_AM) { + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided_mimic_am.fct = + item->cb.cb_type.onesided_mimic_am.fct; + array_of_callbacks[mpi_funnelled_last_active_req].cb_type.onesided_mimic_am.msg = + item->cb.cb_type.onesided_mimic_am.msg; + } else { + /* No other types of callbacks should be postponed */ + assert(0); + } + + if(item->post_isend) { + mpi_funnelled_mem_reg_handle_t *ldata = (mpi_funnelled_mem_reg_handle_t *) item->cb.cb_type.onesided.lreg; + MPI_Isend((char *)ldata->mem + item->cb.cb_type.onesided.ldispl, ldata->count, + ldata->datatype, item->cb.cb_type.onesided.remote, item->cb.cb_type.onesided.size, dep_comm, + &array_of_requests[mpi_funnelled_last_active_req]); + } + + mpi_funnelled_last_active_req++; + + parsec_thread_mempool_free(mpi_funnelled_dynamic_req_mempool->thread_mempools, item); + + return 1; +} + +int +mpi_no_thread_progress(parsec_comm_engine_t *ce) +{ + MPI_Status *status; + int ret = 0, idx, outcount, pos; + mpi_funnelled_callback_t *cb; + int length; + + do { + MPI_Testsome(mpi_funnelled_last_active_req, array_of_requests, + &outcount, array_of_indices, array_of_statuses); + + if(0 == outcount) goto feed_more_work; /* can we push some more work? */ + + /* Trigger the callbacks */ + for( idx = 0; idx < outcount; idx++ ) { + cb = &array_of_callbacks[array_of_indices[idx]]; + status = &(array_of_statuses[idx]); + + MPI_Get_count(status, MPI_PACKED, &length); + + /* Serve the callback and comeback */ + mpi_no_thread_serve_cb(ce, cb, status->MPI_TAG, + status->MPI_SOURCE, length, + MPI_FUNNELLED_TYPE_AM == cb->type ? (void *)cb->tag->buf[cb->storage2] : NULL, + 1); + ret++; + } + + for( idx = outcount-1; idx >= 0; idx-- ) { + pos = array_of_indices[idx]; + if(MPI_REQUEST_NULL != array_of_requests[pos]) + continue; /* The callback replaced the completed request, keep going */ + assert(pos >= mpi_funnelled_static_req_idx); + /* Get the last active callback to replace the empty one */ + mpi_funnelled_last_active_req--; + if(mpi_funnelled_last_active_req > pos) { + array_of_requests[pos] = array_of_requests[mpi_funnelled_last_active_req]; + array_of_callbacks[pos] = array_of_callbacks[mpi_funnelled_last_active_req]; + } + array_of_requests[mpi_funnelled_last_active_req] = MPI_REQUEST_NULL; + } + + feed_more_work: + /* check completion of posted requests */ + while(mpi_funnelled_last_active_req < size_of_total_reqs && + !parsec_list_nolock_is_empty(&mpi_funnelled_dynamic_req_fifo)) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_push_posted_req(ce); + } + if(0 == outcount) return ret; + } while(1); +} + +int +mpi_no_thread_enable(parsec_comm_engine_t *ce) +{ + (void) ce; + return 1; +} + +int +mpi_no_thread_disable(parsec_comm_engine_t *ce) +{ + (void) ce; + return 1; +} + +int +mpi_no_thread_pack(parsec_comm_engine_t *ce, + void *inbuf, int incount, parsec_datatype_t type, + void *outbuf, int outsize, + int *positionA) +{ + (void) ce; + return MPI_Pack(inbuf, incount, type, outbuf, outsize, positionA, dep_comm); + +} + +int +mpi_no_thread_pack_size(parsec_comm_engine_t *ce, + int incount, parsec_datatype_t type, + int* size) +{ + (void) ce; + return MPI_Pack_size(incount, type, dep_comm, size); +} +int +mpi_no_thread_unpack(parsec_comm_engine_t *ce, + void *inbuf, int insize, int *position, + void *outbuf, int outcount, parsec_datatype_t type) +{ + (void) ce; + return MPI_Unpack(inbuf, insize, position, outbuf, outcount, type, dep_comm); +} + +/* Mechanism to post global synchronization from upper layer */ +int +mpi_no_thread_sync(parsec_comm_engine_t *ce) +{ + (void) ce; + MPI_Barrier(dep_comm); + return 0; +} + +/* The upper layer will query the bottom layer before pushing + * additional one-sided messages. + */ +int +mpi_no_thread_can_push_more(parsec_comm_engine_t *ce) +{ + (void) ce; +#if 0 + /* Here we first push pending work before we decide and let the + * upper layer know if they should push more work or not + */ + /* Push saved requests first */ + while(mpi_funnelled_last_active_req < size_of_total_reqs && + !parsec_list_nolock_is_empty(&mpi_funnelled_dynamic_req_fifo)) { + assert(mpi_funnelled_last_active_req < size_of_total_reqs); + mpi_no_thread_progress_saved_req(ce); + } +#endif + + /* Do we have room to post more requests? */ + return mpi_funnelled_last_active_req < size_of_total_reqs; +} diff --git a/parsec/parsec_mpi_funnelled.h b/parsec/parsec_mpi_funnelled.h new file mode 100644 index 000000000..8cea73ea2 --- /dev/null +++ b/parsec/parsec_mpi_funnelled.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2009-2018 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + */ +#ifndef __USE_PARSEC_MPI_FUNNELLED_H__ +#define __USE_PARSEC_MPI_FUNNELLED_H__ + +#include "parsec/parsec_comm_engine.h" + +/* ------- Funnelled MPI implementation below ------- */ +parsec_comm_engine_t * mpi_funnelled_init(parsec_context_t *parsec_context); +int mpi_funnelled_fini(parsec_comm_engine_t *comm_engine); + +int mpi_no_thread_tag_register(parsec_ce_tag_t tag, + parsec_ce_am_callback_t cb, + void *cb_data, + size_t msg_length); + +int mpi_no_thread_tag_unregister(parsec_ce_tag_t tag); + +int +mpi_no_thread_mem_register(void *mem, parsec_mem_type_t mem_type, + size_t count, parsec_datatype_t datatype, + size_t mem_size, + parsec_ce_mem_reg_handle_t *lreg, + size_t *lreg_size); + +int mpi_no_thread_mem_unregister(parsec_ce_mem_reg_handle_t *lreg); + +int mpi_no_thread_get_mem_reg_handle_size(void); + +int mpi_no_thread_mem_retrieve(parsec_ce_mem_reg_handle_t lreg, void **mem, parsec_datatype_t *datatype, int *count); + +int mpi_no_thread_put(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); + +int mpi_no_thread_get(parsec_comm_engine_t *comm_engine, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + parsec_ce_onesided_callback_t l_cb, void *l_cb_data, + parsec_ce_tag_t r_tag, void *r_cb_data, size_t r_cb_data_size); + +int mpi_no_thread_send_active_message(parsec_comm_engine_t *comm_engine, + parsec_ce_tag_t tag, + int remote, + void *addr, size_t size); + +int mpi_no_thread_progress(parsec_comm_engine_t *comm_engine); + +int mpi_no_thread_enable(parsec_comm_engine_t *comm_engine); +int mpi_no_thread_disable(parsec_comm_engine_t *comm_engine); + +int mpi_no_thread_pack(parsec_comm_engine_t *ce, + void *inbuf, int incount, parsec_datatype_t type, + void *outbuf, int outsize, + int *positionA); + +int mpi_no_thread_pack_size(parsec_comm_engine_t *ce, + int incount, parsec_datatype_t type, + int *size); + +int mpi_no_thread_unpack(parsec_comm_engine_t *ce, + void *inbuf, int insize, int *position, + void *outbuf, int outcount, parsec_datatype_t type); + +int mpi_no_thread_sync(parsec_comm_engine_t *comm_engine); + +int +mpi_no_thread_can_push_more(parsec_comm_engine_t *c_e); + +#endif /* __USE_PARSEC_MPI_FUNNELLED_H__ */ diff --git a/parsec/remote_dep.c b/parsec/remote_dep.c index d579f678e..2216160fb 100644 --- a/parsec/remote_dep.c +++ b/parsec/remote_dep.c @@ -24,17 +24,17 @@ * - 3: communication thread is up but sleeping */ int parsec_communication_engine_up = -1; -static int parsec_comm_output_stream = 0; -static int parsec_comm_verbose = 0; +int parsec_comm_output_stream = 0; +int parsec_comm_verbose = 0; #ifdef DISTRIBUTED /* comm_yield mode: see valid values in the corresponding mca_register */ -static int comm_yield = 1; +int comm_yield = 1; /* comm_yield_duration (ns) */ -static int comm_yield_ns = 5000; +int comm_yield_ns = 5000; /* comm_thread_multiple: see values in the corresponding mca_register */ -static int parsec_param_comm_thread_multiple = -1; +int parsec_param_comm_thread_multiple = -1; static int remote_dep_bcast_star_child(int me, int him); #ifdef PARSEC_DIST_COLLECTIVES @@ -47,7 +47,7 @@ static int (*remote_dep_bcast_child)(int me, int him) = remote_dep_bcast_chainpi #define remote_dep_bcast_child(me, him) remote_dep_bcast_start_child(me, him) #endif -static int remote_dep_bind_thread(parsec_context_t* context); +int remote_dep_bind_thread(parsec_context_t* context); /* Clear the already forwarded remote dependency matrix */ static inline void @@ -90,7 +90,7 @@ remote_dep_is_forwarded(parsec_execution_stream_t* es, return (int) ((rdeps->remote_dep_fw_mask[boffset] & mask) != 0); } - +#if 0 /* make sure we don't leave before serving all data deps */ static inline void remote_dep_inc_flying_messages(parsec_taskpool_t* handle) @@ -105,10 +105,11 @@ remote_dep_dec_flying_messages(parsec_taskpool_t *handle) { (void)parsec_taskpool_update_runtime_nbtask(handle, -1); } +#endif /* Mark that ncompleted of the remote deps are finished, and return the remote dep to * the free items queue if it is now done */ -static inline int +int remote_dep_complete_and_cleanup(parsec_remote_deps_t** deps, int ncompleted) { @@ -142,7 +143,7 @@ remote_dep_complete_and_cleanup(parsec_remote_deps_t** deps, return 0; } -inline parsec_remote_deps_t* remote_deps_allocate( parsec_lifo_t* lifo ) +parsec_remote_deps_t* remote_deps_allocate( parsec_lifo_t* lifo ) { parsec_remote_deps_t* remote_deps = (parsec_remote_deps_t*)parsec_lifo_pop(lifo); uint32_t i, rank_bit_size; @@ -220,12 +221,14 @@ inline void remote_deps_free(parsec_remote_deps_t* deps) #endif +#if 0 #ifdef PARSEC_HAVE_MPI #include "remote_dep_mpi.c" #else #endif /* NO TRANSPORT */ +#endif #ifdef DISTRIBUTED @@ -586,7 +589,7 @@ int parsec_remote_dep_activate(parsec_execution_stream_t* es, } parsec_remote_dep_context_t parsec_remote_dep_context; -static int parsec_remote_dep_inited = 0; +int parsec_remote_dep_inited = 0; /* THIS FUNCTION MUST NOT BE CALLED WHILE REMOTE DEP IS ON. * NOT THREAD SAFE (AND SHOULD NOT BE) */ @@ -637,7 +640,7 @@ void remote_deps_allocation_fini(void) } /* Bind the communication thread on an unused core if possible */ -static int remote_dep_bind_thread(parsec_context_t* context) +int remote_dep_bind_thread(parsec_context_t* context) { #if defined(PARSEC_HAVE_HWLOC) && defined(PARSEC_HAVE_HWLOC_BITMAP) char *str = NULL; diff --git a/parsec/remote_dep.h b/parsec/remote_dep.h index 76cc9ed00..fd769cefe 100644 --- a/parsec/remote_dep.h +++ b/parsec/remote_dep.h @@ -1,22 +1,31 @@ /* - * Copyright (c) 2009-2019 The University of Tennessee and The University + * Copyright (c) 2009-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. */ -#ifndef __PARSEC_USE_REMOTE_DEP_H__ -#define __PARSEC_USE_REMOTE_DEP_H__ +#ifndef __USE_PARSEC_REMOTE_DEP_H__ +#define __USE_PARSEC_REMOTE_DEP_H__ /** @addtogroup parsec_internal_communication * @{ */ + +#define PARSEC_REMOTE_DEP_USE_THREADS + +#include "parsec/bindthread.h" +#include "parsec/class/dequeue.h" #include "parsec/class/lifo.h" -#include "parsec/class/parsec_future.h" #include "parsec/parsec_description_structures.h" +#include "parsec/parsec_internal.h" +#include "parsec/parsec_comm_engine.h" +#include "parsec/scheduling.h" +#include "parsec/parsec_internal.h" -BEGIN_C_DECLS +typedef struct dep_cmd_item_s dep_cmd_item_t; +typedef union dep_cmd_u dep_cmd_t; -typedef ptrdiff_t remote_dep_datakey_t; +typedef unsigned long remote_dep_datakey_t; #define PARSEC_ACTION_DEPS_MASK 0x00FFFFFF #define PARSEC_ACTION_RELEASE_LOCAL_DEPS 0x01000000 @@ -29,33 +38,30 @@ typedef ptrdiff_t remote_dep_datakey_t; #define PARSEC_ACTION_RESHAPE_REMOTE_ON_RELEASE 0x80000000 #define PARSEC_ACTION_RELEASE_REMOTE_DEPS (PARSEC_ACTION_SEND_INIT_REMOTE_DEPS | PARSEC_ACTION_SEND_REMOTE_DEPS) -typedef struct remote_dep_wire_activate_s -{ +typedef enum { + REMOTE_DEP_ACTIVATE_TAG = 2, + REMOTE_DEP_GET_DATA_TAG, + REMOTE_DEP_PUT_END_TAG, + REMOTE_DEP_MAX_CTRL_TAG +} parsec_remote_dep_tag_t; + +typedef struct remote_dep_wire_activate_s { remote_dep_datakey_t deps; /**< a pointer to the dep structure on the source */ remote_dep_datakey_t output_mask; /**< the mask of the output dependencies satisfied by this activation message */ - remote_dep_datakey_t tag; uint32_t taskpool_id; uint32_t task_class_id; uint32_t length; parsec_assignment_t locals[MAX_LOCAL_COUNT]; } remote_dep_wire_activate_t; -typedef struct remote_dep_wire_get_s -{ - remote_dep_datakey_t deps; - remote_dep_datakey_t output_mask; - remote_dep_datakey_t tag; +typedef struct remote_dep_wire_get_s { + remote_dep_datakey_t source_deps; + remote_dep_datakey_t remote_callback_data; + remote_dep_datakey_t output_mask; + uintptr_t callback_fn; + parsec_ce_mem_reg_handle_t remote_memory_handle; } remote_dep_wire_get_t; -/** - * This structure holds the key information for any data movement. It contains the arena - * where the data is allocated from, or will be allocated from. It also contains the - * pointer to the buffer involved in the communication (or NULL if the data will be - * allocated before the reception). Finally, it contains the triplet allowing a correct send - * or receive operation: the memory layout, the number fo repetitions and the displacement - * from the data pointer where the operation will start. If the memory layout is NULL the - * one attached to the arena must be used instead. - */ struct parsec_dep_type_description_s { struct parsec_arena_s *arena; parsec_datatype_t src_datatype; @@ -66,6 +72,15 @@ struct parsec_dep_type_description_s { int64_t dst_displ; }; +/** + * This structure holds the key information for any data mouvement. It contains the arena + * where the data is allocated from, or will be allocated from. It also contains the + * pointer to the buffer involved in the communication (or NULL if the data will be + * allocated before the reception). Finally, it contains the triplet allowing a correct send + * or receive operation: the memory layout, the number fo repetitions and the displacement + * from the data pointer where the operation will start. If the memory layout is NULL the + * one attached to the arena must be used instead. + */ struct parsec_dep_data_description_s { struct parsec_data_copy_s *data; struct parsec_dep_type_description_s local; @@ -86,7 +101,6 @@ struct parsec_dep_data_description_s { struct data_repo_s *repo; parsec_key_t repo_key; #endif - }; #define PARSEC_AVOID_RESHAPE_AFTER_RECEPTION 0x0F @@ -98,7 +112,7 @@ struct parsec_reshape_promise_description_s { * the same reshape promise (workaround comm engine) */ #endif uint32_t remote_recv_guard; /* Use to prevent re-reshaping after reception */ - }; +}; /* Callback to do a local reshaping of a datacopy */ void parsec_local_reshape(parsec_base_future_t *future, @@ -107,6 +121,7 @@ void parsec_local_reshape(parsec_base_future_t *future, parsec_task_t *task); + struct remote_dep_output_param_s { /** Never change this structure without understanding the * "subtle" relation with remote_deps_allocation_init in @@ -115,12 +130,12 @@ struct remote_dep_output_param_s { parsec_list_item_t super; parsec_remote_deps_t *parent; struct parsec_dep_data_description_s data; /**< The data propagated by this message. */ - uint32_t deps_mask; /**< A bitmask of all the output dependencies - propagated by this message. The bitmask uses - depedencies indexes not flow indexes. */ - int32_t priority; /**< the priority of the message */ - uint32_t count_bits; /**< The number of participants */ - uint32_t* rank_bits; /**< The array of bits representing the propagation path */ + uint32_t deps_mask; /**< A bitmask of all the output dependencies + propagated by this message. The bitmask uses + depedencies indexes not flow indexes. */ + int32_t priority; /**< the priority of the message */ + uint32_t count_bits; /**< The number of participants */ + uint32_t* rank_bits; /**< The array of bits representing the propagation path */ }; struct parsec_remote_deps_s { @@ -133,6 +148,7 @@ struct parsec_remote_deps_s { uint32_t incoming_mask; /**< track all incoming actions (receives) */ uint32_t outgoing_mask; /**< track all outgoing actions (send) */ remote_dep_wire_activate_t msg; /**< A copy of the message control */ + void *eager_msg; /**< A pointer to the eager buffer if this is an eager msg, otherwise NULL */ int32_t max_priority; int32_t priority; uint32_t *remote_dep_fw_mask; /**< list of peers already notified about @@ -151,8 +167,12 @@ struct parsec_remote_deps_s { * - positive: the meaning is defined by the communication engine. */ extern int parsec_communication_engine_up; +extern int parsec_comm_output_stream; +extern int parsec_comm_verbose; +extern parsec_execution_stream_t parsec_comm_es; +extern int parsec_param_comm_thread_multiple; -#if defined(DISTRIBUTED) +#ifdef DISTRIBUTED typedef struct { parsec_lifo_t freelist; @@ -181,6 +201,7 @@ int parsec_remote_dep_fini(parsec_context_t* context); int parsec_remote_dep_on(parsec_context_t* context); int parsec_remote_dep_off(parsec_context_t* context); + /* Poll for remote completion of tasks that would enable some work locally */ int parsec_remote_dep_progress(parsec_execution_stream_t* es); @@ -195,22 +216,17 @@ int parsec_remote_dep_activate(parsec_execution_stream_t* es, /* Memcopy a particular data using datatype specification */ void parsec_remote_dep_memcpy(parsec_execution_stream_t* es, - parsec_taskpool_t* tp, - parsec_data_copy_t *dst, - parsec_data_copy_t *src, - parsec_dep_data_description_t* data); + parsec_taskpool_t* tp, + parsec_data_copy_t *dst, + parsec_data_copy_t *src, + parsec_dep_data_description_t* data); -/* This function adds a command in the commnad queue to activate +/* This function adds a command in the command queue to activate * release_deps of dep we had to delay in DTD runs. */ -int -remote_dep_dequeue_delayed_dep_release(parsec_remote_deps_t *deps); - -/* This function creates a fake eu for comm thread for profiling DTD runs */ -void -remote_dep_mpi_initialize_execution_stream(parsec_context_t *context); +int remote_dep_dequeue_delayed_dep_release(parsec_remote_deps_t *deps); -#ifdef PARSEC_DIST_COLLECTIVES +#if defined(PARSEC_DIST_COLLECTIVES) /* Propagate an activation order from the current node down the original tree */ int parsec_remote_dep_propagate(parsec_execution_stream_t* es, const parsec_task_t* task, @@ -225,7 +241,6 @@ int parsec_remote_dep_propagate(parsec_execution_stream_t* es, #define parsec_remote_dep_progress(ctx) 0 #define parsec_remote_dep_activate(ctx, o, r) -1 #define parsec_remote_dep_new_taskpool(ctx) 0 -#define remote_dep_mpi_initialize_execution_stream(ctx) 0 #endif /* DISTRIBUTED */ /* check if this data description represents a CTL dependency */ @@ -238,8 +253,186 @@ int parsec_remote_dep_propagate(parsec_execution_stream_t* es, #define parsec_set_CTL_dep(dep_data_desc)\ dep_data_desc.data = NULL; dep_data_desc.remote.src_datatype = PARSEC_DATATYPE_NULL; dep_data_desc.remote.src_count=0; -END_C_DECLS /** @} */ -#endif /* __PARSEC_USE_REMOTE_DEP_H__ */ +#define DEP_NB_CONCURENT 3 + +extern int parsec_comm_gets_max; +extern int parsec_comm_gets; +extern int parsec_comm_puts_max; +extern int parsec_comm_puts; + +/** + * The order is important as it will be used to compute the index in the + * pending array of messages. + */ +typedef enum dep_cmd_action_t { + DEP_ACTIVATE = -1, + DEP_NEW_TASKPOOL = 0, + DEP_MEMCPY, + DEP_MEMCPY_RESHAPE, + DEP_RELEASE, + DEP_DTD_DELAYED_RELEASE, + DEP_PUT_DATA, + DEP_GET_DATA, + DEP_CTL, + DEP_LAST /* always the last element. it shoud not be used */ +} dep_cmd_action_t; + +union dep_cmd_u { + struct { + remote_dep_wire_get_t task; + int peer; + parsec_ce_mem_reg_handle_t remote_memory_handle; + } activate; + struct { + parsec_remote_deps_t *deps; + } release; + struct { + int enable; + } ctl; + struct { + parsec_taskpool_t *tp; + } new_taskpool; + struct { + parsec_taskpool_t *taskpool; + parsec_data_copy_t *source; + parsec_data_copy_t *destination; + parsec_dep_type_description_t layout; + } memcpy; + struct { + parsec_taskpool_t *taskpool; + parsec_data_copy_t *source; + parsec_data_copy_t *destination; + parsec_dep_type_description_t layout; + parsec_task_t *task; + parsec_datacopy_future_t *future; + parsec_reshape_promise_description_t *dt; + } memcpy_reshape; + +}; + +struct dep_cmd_item_s { + parsec_list_item_t super; + parsec_list_item_t pos_list; + dep_cmd_action_t action; + int priority; + dep_cmd_t cmd; +}; + +#define dep_cmd_prio (offsetof(dep_cmd_item_t, priority)) +#define dep_mpi_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) +#define rdep_prio (offsetof(parsec_remote_deps_t, max_priority)) + +/** + * These functions will be inherited from the current remote_dep_mpi.c + * and for the time being will remain in there. + */ +void* remote_dep_dequeue_main(parsec_context_t* context); +int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp); + +int remote_dep_dequeue_init(parsec_context_t* context); +int remote_dep_dequeue_fini(parsec_context_t* context); + +int remote_dep_dequeue_send(parsec_execution_stream_t* es, int rank, + parsec_remote_deps_t* deps); + +int remote_dep_dequeue_on(parsec_context_t* context); +int remote_dep_dequeue_off(parsec_context_t* context); +# define remote_dep_init(ctx) remote_dep_dequeue_init(ctx) +# define remote_dep_fini(ctx) remote_dep_dequeue_fini(ctx) +# define remote_dep_on(ctx) remote_dep_dequeue_on(ctx) +# define remote_dep_off(ctx) remote_dep_dequeue_off(ctx) +# define remote_dep_new_taskpool(tp) remote_dep_dequeue_new_taskpool(tp) +# define remote_dep_send(es, rank, deps) remote_dep_dequeue_send(es, rank, deps) +# define remote_dep_progress(es, cycles) remote_dep_dequeue_nothread_progress(es, cycles) + +int remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, int cycles); + +int remote_dep_bind_thread(parsec_context_t* context); +int remote_dep_complete_and_cleanup(parsec_remote_deps_t** deps, + int ncompleted); + +/* comm_yield mode: see valid values in the corresponding mca_register */ +extern int comm_yield; +/* comm_yield_duration (ns) */ +extern int comm_yield_ns; + +/* make sure we don't leave before serving all data deps */ +static inline void +remote_dep_inc_flying_messages(parsec_taskpool_t* handle) +{ + assert( handle->nb_pending_actions > 0 ); + (void)parsec_atomic_fetch_inc_int32( &(handle->nb_pending_actions) ); +} + +/* allow for termination when all deps have been served */ +static inline void +remote_dep_dec_flying_messages(parsec_taskpool_t *handle) +{ + (void)parsec_taskpool_update_runtime_nbtask(handle, -1); +} + +int remote_dep_set_ctx(parsec_context_t* context, intptr_t opaque_comm_ctx ); + +parsec_remote_deps_t* remote_deps_allocate( parsec_lifo_t* lifo ); + +void remote_deps_allocation_init(int np, int max_output_deps); + +typedef struct { + int rank_src; // 0 + int rank_dst; // 4 + uint64_t tid; // 8 + uint32_t tpid; // 16 + uint32_t tcid; // 20- + int32_t msg_size; // 24 + int32_t padding; // 28 -- this field is not necessary, but the structure will be padded + // by the compiler due to the uint64_t field. It is declared here + // just to be consistent with the conversion string. +} parsec_profile_remote_dep_mpi_info_t; // 32 bytes + + +#ifdef PARSEC_PROF_TRACE +extern int MPI_Activate_sk, MPI_Activate_ek; +extern int MPI_Data_ctl_sk, MPI_Data_ctl_ek; +extern int MPI_Data_plds_sk, MPI_Data_plds_ek; +extern int MPI_Data_pldr_sk, MPI_Data_pldr_ek; +extern int activate_cb_trace_sk, activate_cb_trace_ek; +extern int put_cb_trace_sk, put_cb_trace_ek; + +// TODO: how to replace call to MPI_Pack_size? +#define TAKE_TIME_WITH_INFO(PROF, KEY, I, src, dst, rdw, nbdtt, dtt, comm) \ + if( parsec_profile_enabled ) { \ + parsec_profile_remote_dep_mpi_info_t __info; \ + parsec_taskpool_t *__tp = parsec_taskpool_lookup( (rdw).taskpool_id ); \ + const parsec_task_class_t *__tc = __tp->task_classes_array[(rdw).task_class_id ]; \ + __info.rank_src = (src); \ + __info.rank_dst = (dst); \ + __info.tpid = __tp->taskpool_id; \ + __info.tcid = (rdw).task_class_id; \ + __info.tid = __tc->key_functions->key_hash( \ + __tc->make_key(__tp, (rdw).locals), NULL); \ + MPI_Pack_size(nbdtt, dtt, comm, &__info.msg_size); \ + PARSEC_PROFILING_TRACE((PROF), (KEY), (I), \ + PROFILE_OBJECT_ID_NULL, &__info); \ + } + +#define TAKE_TIME(PROF, KEY, I) PARSEC_PROFILING_TRACE((PROF), (KEY), (I), PROFILE_OBJECT_ID_NULL, NULL) + +#else +#define TAKE_TIME_WITH_INFO(PROF, KEY, I, src, dst, rdw, count, dtt, comm) do {} while(0) +#define TAKE_TIME(PROF, KEY, I) do {} while(0) +#endif /* PARSEC_PROF_TRACE */ + +char* +remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin, + char* str, + size_t len); + +extern int parsec_comm_gets_max; +extern int parsec_comm_gets; +extern int parsec_comm_puts_max; +extern int parsec_comm_puts; + +#endif /* __USE_PARSEC_REMOTE_DEP_H__ */ diff --git a/parsec/remote_dep_mpi.c b/parsec/remote_dep_mpi.c index 731bfa56e..15515f2ec 100644 --- a/parsec/remote_dep_mpi.c +++ b/parsec/remote_dep_mpi.c @@ -1,23 +1,18 @@ -/* - * Copyright (c) 2009-2020 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - */ - -/* /!\ THIS FILE IS NOT INTENDED TO BE COMPILED ON ITS OWN - * It should be included from remote_dep.c if PARSEC_HAVE_MPI is defined - */ #include "parsec/parsec_config.h" #include #include "profiling.h" #include "parsec/class/list.h" #include "parsec/utils/output.h" +#include "parsec/utils/mca_param.h" #include "parsec/utils/debug.h" #include "parsec/debug_marks.h" #include "parsec/data.h" #include "parsec/papi_sde.h" #include "parsec/interfaces/dtd/insert_function_internal.h" +#include "parsec/remote_dep.h" +#include "parsec/class/dequeue.h" + #include "parsec/parsec_binary_profile.h" #include "parsec/parsec_internal.h" @@ -26,67 +21,13 @@ static int64_t count_reshaping = 0; #endif -#define PARSEC_REMOTE_DEP_USE_THREADS - #define PARSEC_DTD_SKIP_SAVING -1 -static char **dep_activate_buff; - -typedef struct dep_cmd_item_s dep_cmd_item_t; -typedef union dep_cmd_u dep_cmd_t; - -static int remote_dep_mpi_setup(parsec_context_t* context); -static int remote_dep_mpi_cleanup(parsec_context_t* context); -static int remote_dep_mpi_fini(parsec_context_t* context); -static int remote_dep_mpi_on(parsec_context_t* context); -static int remote_dep_mpi_progress(parsec_execution_stream_t* es); -static int remote_dep_get_datatypes(parsec_execution_stream_t* es, - parsec_remote_deps_t* origin, - int storage_id, int *position); -static parsec_remote_deps_t* -remote_dep_release_incoming(parsec_execution_stream_t* es, - parsec_remote_deps_t* origin, - remote_dep_datakey_t complete_mask); - -static int remote_dep_nothread_send(parsec_execution_stream_t* es, - dep_cmd_item_t **head_item); -static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, - dep_cmd_item_t *item); -static int local_dep_nothread_reshape(parsec_execution_stream_t* es, - dep_cmd_item_t *item); - -static int remote_dep_dequeue_send(parsec_execution_stream_t* es, int rank, parsec_remote_deps_t* deps); -static int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp); -#ifdef PARSEC_REMOTE_DEP_USE_THREADS -static int remote_dep_dequeue_init(parsec_context_t* context); -static int remote_dep_dequeue_fini(parsec_context_t* context); -static int remote_dep_dequeue_on(parsec_context_t* context); -static int remote_dep_dequeue_off(parsec_context_t* context); -/*static int remote_dep_dequeue_progress(parsec_execution_stream_t* es);*/ -# define remote_dep_init(ctx) remote_dep_dequeue_init(ctx) -# define remote_dep_fini(ctx) remote_dep_dequeue_fini(ctx) -# define remote_dep_on(ctx) remote_dep_dequeue_on(ctx) -# define remote_dep_off(ctx) remote_dep_dequeue_off(ctx) -# define remote_dep_new_taskpool(tp) remote_dep_dequeue_new_taskpool(tp) -# define remote_dep_send(es, rank, deps) remote_dep_dequeue_send(es, rank, deps) -# define remote_dep_progress(es, cycles) remote_dep_dequeue_nothread_progress(es, cycles) - -#else -static int remote_dep_dequeue_nothread_init(parsec_context_t* context); -static int remote_dep_dequeue_nothread_fini(parsec_context_t* context); -# define remote_dep_init(ctx) remote_dep_dequeue_nothread_init(ctx) -# define remote_dep_fini(ctx) remote_dep_dequeue_nothread_fini(ctx) -# define remote_dep_on(ctx) remote_dep_mpi_on(ctx) -# define remote_dep_off(ctx) 0 -# define remote_dep_new_taskpool(tp) remote_dep_dequeue_new_taskpool(tp) -# define remote_dep_send(es, rank, deps) remote_dep_dequeue_send(es, rank, deps) -# define remote_dep_progress(es, cycles) remote_dep_dequeue_nothread_progress(es, cycles) -#endif -static int remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, int cycles); - -#include "parsec/class/dequeue.h" +int parsec_comm_gets_max = DEP_NB_CONCURENT * MAX_PARAM_COUNT; +int parsec_comm_gets = 0; +int parsec_comm_puts_max = DEP_NB_CONCURENT * MAX_PARAM_COUNT; +int parsec_comm_puts = 0; -#include "parsec/utils/mca_param.h" /** * Number of data movements to be extracted at each step. Bigger the number * larger the amount spent in ordering the tasks, but greater the potential @@ -94,115 +35,29 @@ static int remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, i */ static void remote_dep_mpi_params(parsec_context_t* context); static int parsec_param_nb_tasks_extracted = 20; -/* For the meaning of aggregate, overtake, and short, refer to the - * param register help text for comm_aggregate, comm_overtake, and +/* For the meaning of aggregate, short and eager, refer to the + * param register help text for comm_aggregate, and * comm_short_limit respectively. */ static size_t parsec_param_short_limit = RDEP_MSG_SHORT_LIMIT; -static int parsec_param_enable_aggregate = 1; -#if defined(PARSEC_HAVE_MPI_OVERTAKE) -static int parsec_param_enable_mpi_overtake = 1; -#endif - -#define DEP_NB_CONCURENT 3 -static int DEP_NB_REQ; - -static int parsec_comm_activations_max = 2*DEP_NB_CONCURENT; -static int parsec_comm_data_get_max = 2*DEP_NB_CONCURENT; -static int parsec_comm_gets_max = DEP_NB_CONCURENT * MAX_PARAM_COUNT; -static int parsec_comm_gets = 0; -static int parsec_comm_puts_max = DEP_NB_CONCURENT * MAX_PARAM_COUNT; -static int parsec_comm_puts = 0; -static int parsec_comm_last_active_req = 0; +static int parsec_param_enable_aggregate = 0; -/* The internal communicator used by the communication engine to host its requests and - * other operations. It is a copy of the context->comm_ctx (which is a duplicate of - * whatever the user provides). - */ -static MPI_Comm dep_comm = MPI_COMM_NULL; -/* The internal communicator for all intra-node communications */ -static MPI_Comm dep_self = MPI_COMM_NULL; - -/** - * The order is important as it will be used to compute the index in the - * pending array of messages. - */ -typedef enum dep_cmd_action_t { - DEP_ACTIVATE = -1, - DEP_NEW_TASKPOOL = 0, - DEP_MEMCPY, - DEP_MEMCPY_RESHAPE, - DEP_RELEASE, - DEP_DTD_DELAYED_RELEASE, -/* DEP_PROGRESS, */ - DEP_PUT_DATA, - DEP_GET_DATA, - DEP_CTL, - DEP_LAST /* always the last element. it shoud not be used */ -} dep_cmd_action_t; - -union dep_cmd_u { - struct { - remote_dep_wire_get_t task; - int peer; - } activate; - struct { - parsec_remote_deps_t *deps; - } release; - struct { - int enable; - } ctl; - struct { - parsec_taskpool_t *tp; - } new_taskpool; - struct { - parsec_taskpool_t *taskpool; - parsec_data_copy_t *source; - parsec_data_copy_t *destination; - parsec_dep_type_description_t layout; - } memcpy; - struct { - parsec_taskpool_t *taskpool; - parsec_data_copy_t *source; - parsec_data_copy_t *destination; - parsec_dep_type_description_t layout; - parsec_task_t *task; - parsec_datacopy_future_t *future; - parsec_reshape_promise_description_t *dt; - } memcpy_reshape; -}; +parsec_mempool_t *parsec_remote_dep_cb_data_mempool; -struct dep_cmd_item_s { - parsec_list_item_t super; - parsec_list_item_t pos_list; - dep_cmd_action_t action; - int priority; - dep_cmd_t cmd; -}; -#define dep_cmd_prio (offsetof(dep_cmd_item_t, priority)) -#define dep_mpi_pos_list (offsetof(dep_cmd_item_t, priority) - offsetof(dep_cmd_item_t, pos_list)) -#define rdep_prio (offsetof(parsec_remote_deps_t, max_priority)) +typedef struct remote_dep_cb_data_s { + parsec_list_item_t super; + parsec_thread_mempool_t *mempool_owner; + parsec_remote_deps_t *deps; /* always local */ + parsec_ce_mem_reg_handle_t memory_handle; + int k; +} remote_dep_cb_data_t; -typedef struct parsec_comm_callback_s parsec_comm_callback_t; +PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(remote_dep_cb_data_t); -static int -remote_dep_mpi_save_put_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, MPI_Status* status); -static void remote_dep_mpi_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); -static int remote_dep_mpi_put_end_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, MPI_Status* status); -static int remote_dep_mpi_save_activate_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, MPI_Status* status); -static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); -static void remote_dep_mpi_get_end( parsec_execution_stream_t* es, int idx, parsec_remote_deps_t* deps ); -static int -remote_dep_mpi_get_end_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, MPI_Status* status); -static void remote_dep_mpi_new_taskpool( parsec_execution_stream_t* es, dep_cmd_item_t *item ); -static void remote_dep_mpi_release_delayed_deps( parsec_execution_stream_t* es, - dep_cmd_item_t *item ); +PARSEC_OBJ_CLASS_INSTANCE(remote_dep_cb_data_t, parsec_list_item_t, + NULL, NULL); -extern char* +char* remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin, char* str, size_t len) @@ -218,6 +73,18 @@ remote_dep_cmd_to_string(remote_dep_wire_activate_t* origin, return parsec_task_snprintf(str, len, &task); } +/* TODO: fix heterogeneous restriction by using proper mpi datatypes */ +#define dep_dtt parsec_datatype_int8_t +#define dep_count sizeof(remote_dep_wire_activate_t) +#define dep_extent dep_count +#define DEP_SHORT_BUFFER_SIZE (dep_extent+RDEP_MSG_SHORT_LIMIT) +#if PARSEC_SIZEOF_VOID_P == 4 +#define datakey_dtt parsec_datatype_int32_t +#else +#define datakey_dtt parsec_datatype_int64_t +#endif +#define datakey_count 3 + static pthread_t dep_thread_id; parsec_dequeue_t dep_cmd_queue; parsec_list_t dep_cmd_fifo; /* ordered non threaded fifo */ @@ -231,14 +98,13 @@ parsec_list_t dep_put_fifo; /* ordered non threaded fifo */ static dep_cmd_item_t** parsec_mpi_same_pos_items; static int parsec_mpi_same_pos_items_size = 0; -static void *remote_dep_dequeue_main(parsec_context_t* context); static int mpi_initialized = 0; #if defined(PARSEC_REMOTE_DEP_USE_THREADS) static pthread_mutex_t mpi_thread_mutex; static pthread_cond_t mpi_thread_condition; #endif -static parsec_execution_stream_t parsec_comm_es = { +parsec_execution_stream_t parsec_comm_es = { .th_id = 0, .core_id = -1, .socket_id = -1, @@ -262,12 +128,65 @@ static parsec_execution_stream_t parsec_comm_es = { .datarepo_mempools = {0} }; +static void remote_dep_mpi_put_start(parsec_execution_stream_t* es, dep_cmd_item_t* item); +static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, parsec_remote_deps_t* deps); + +static void remote_dep_mpi_get_end(parsec_execution_stream_t* es, + int idx, + parsec_remote_deps_t* deps); + +static int +remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data); + +static int +remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data); + +static parsec_remote_deps_t* +remote_dep_release_incoming(parsec_execution_stream_t* es, + parsec_remote_deps_t* origin, + remote_dep_datakey_t complete_mask); + +static int remote_dep_nothread_send(parsec_execution_stream_t* es, + dep_cmd_item_t **head_item); +static int remote_dep_ce_init(parsec_context_t* context); +static int remote_dep_ce_fini(parsec_context_t* context); + +static int local_dep_nothread_reshape(parsec_execution_stream_t* es, + dep_cmd_item_t *item); + +static int remote_dep_mpi_on(parsec_context_t* context); + +static int remote_dep_mpi_progress(parsec_execution_stream_t* es); + +static void remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, + dep_cmd_item_t *dep_cmd_item); + +static void remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, + dep_cmd_item_t *item); + +/* Perform a memcpy with datatypes by doing a local sendrecv */ +static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, + dep_cmd_item_t *item); + + /** * Store the user provided communicator in the PaRSEC context. We need to make a * copy to make sure the communicator does not disappear before the communication * engine starts up. */ -static int remote_dep_set_ctx(parsec_context_t* context, intptr_t opaque_comm_ctx ) +int remote_dep_set_ctx(parsec_context_t* context, intptr_t opaque_comm_ctx ) { MPI_Comm comm; int rc; @@ -318,12 +237,26 @@ static int remote_dep_set_ctx(parsec_context_t* context, intptr_t opaque_comm_ct return (MPI_SUCCESS == rc) ? PARSEC_SUCCESS : PARSEC_ERROR; } -static int remote_dep_dequeue_init(parsec_context_t* context) +static void remote_dep_mpi_params(parsec_context_t* context) { + (void)context; +#if RDEP_MSG_SHORT_LIMIT != 0 + parsec_mca_param_reg_sizet_name("runtime", "comm_short_limit", "Controls the maximum size of a short message. Short messages contain both the control message notifying the completion of a task and the associated data that fit completely in that buffer length. The maximum size of a short message should be lower than the network MTU.", + false, false, parsec_param_short_limit, &parsec_param_short_limit); + if(parsec_param_short_limit > RDEP_MSG_SHORT_LIMIT) { + parsec_warning("User requested a short message limit of %d which is greater than compiled in limit %d; value reset to compiled limit %d", parsec_param_short_limit, RDEP_MSG_SHORT_LIMIT, RDEP_MSG_SHORT_LIMIT); + parsec_param_short_limit = RDEP_MSG_SHORT_LIMIT; + } +#endif + parsec_mca_param_reg_int_name("runtime", "comm_aggregate", "Aggregate multiple dependencies in the same short message (1=true,0=false).", + false, false, parsec_param_enable_aggregate, &parsec_param_enable_aggregate); +} + +int +remote_dep_dequeue_init(parsec_context_t* context) { pthread_attr_t thread_attr; int is_mpi_up = 0; int thread_level_support; - MPI_Comm comm; assert(mpi_initialized == 0); @@ -355,25 +288,12 @@ static int remote_dep_dequeue_init(parsec_context_t* context) thread_level_support == MPI_THREAD_SINGLE ? "MPI_THREAD_SINGLE" : "MPI_THREAD_FUNNELED"); } -#if defined(PARSEC_HAVE_MPI_OVERTAKE) - parsec_mca_param_reg_int_name("runtime", "comm_mpi_overtake", "Lets MPI allow overtaking of messages (if applicable). (0: no, 1: yes)", - false, false, parsec_param_enable_mpi_overtake, &parsec_param_enable_mpi_overtake); - if( -1 == context->comm_ctx ) { - MPI_Info no_order; - MPI_Info_create(&no_order); - if( parsec_param_enable_mpi_overtake ) { - MPI_Info_set(no_order, "mpi_assert_allow_overtaking", "true"); - } - MPI_Comm_dup_with_info(MPI_COMM_WORLD, no_order, &comm); - MPI_Info_free(&no_order); - context->comm_ctx = (intptr_t)comm; - } -#else // defined(PARSEC_HAVE_MPI_OVERTAKE) if( -1 == context->comm_ctx ) { + MPI_Comm comm; MPI_Comm_dup(MPI_COMM_WORLD, &comm); context->comm_ctx = (intptr_t)comm; } -#endif // defined(PARSEC_HAVE_MPI_OVERTAKE) + assert(-1 != context->comm_ctx /* -1 reserved for non-initialized */); MPI_Comm_size( (MPI_Comm)context->comm_ctx, (int*)&(context->nb_nodes)); @@ -383,7 +303,7 @@ static int remote_dep_dequeue_init(parsec_context_t* context) } else if(parsec_param_comm_thread_multiple != -1) { parsec_warning("Requested multithreaded access to the communication engine, but MPI is not initialized with MPI_THREAD_MULTIPLE.\n" - "\t* PaRSEC will continue with the funneled thread communication engine model.\n"); + "\t* PaRSEC will continue with the funneled thread communication engine model.\n"); } } @@ -400,10 +320,10 @@ static int remote_dep_dequeue_init(parsec_context_t* context) /* From now on the communication capabilities are enabled */ parsec_communication_engine_up = 1; if(context->nb_nodes == 1) { - /* We're all by ourselves. In case we need to use MPI to handle data copies - * between different formats let's setup local MPI support. + /* We're all by ourselves. In case we need to use comm engin to handle data copies + * between different formats let's setup it up. */ - remote_dep_mpi_on(context); + remote_dep_ce_init(context); goto up_and_running; } @@ -437,7 +357,8 @@ static int remote_dep_dequeue_init(parsec_context_t* context) return context->nb_nodes; } -static int remote_dep_dequeue_fini(parsec_context_t* context) +int +remote_dep_dequeue_fini(parsec_context_t* context) { if( 0 == mpi_initialized ) return 0; (void)context; @@ -466,7 +387,7 @@ static int remote_dep_dequeue_fini(parsec_context_t* context) assert((parsec_context_t*)ret == context); } else if ( parsec_communication_engine_up == 1 ) { - remote_dep_mpi_fini(context); + remote_dep_ce_fini(context); } assert(NULL == parsec_dequeue_pop_front(&dep_cmd_queue)); @@ -488,7 +409,8 @@ static int remote_dep_dequeue_fini(parsec_context_t* context) * the order is enqueued but the thread is not yet on, and 3 if the thread is * running. */ -static int remote_dep_dequeue_on(parsec_context_t* context) +int +remote_dep_dequeue_on(parsec_context_t* context) { /* If we are the only participant in this execution, we should not have to * communicate with any other process. However, we might have to execute all @@ -504,6 +426,7 @@ static int remote_dep_dequeue_on(parsec_context_t* context) parsec_communication_engine_up = 2; pthread_cond_signal(&mpi_thread_condition); pthread_mutex_unlock(&mpi_thread_mutex); + /* The waking up of the communication thread happen asynchronously, once the thread * receives the signal. At that point it acquires the mpi_thread_mutex and set the * global variable parsec_communication_engine_up to 3. @@ -519,7 +442,8 @@ static int remote_dep_dequeue_on(parsec_context_t* context) return 1; } -static int remote_dep_dequeue_off(parsec_context_t* context) +int +remote_dep_dequeue_off(parsec_context_t* context) { if(parsec_communication_engine_up < 2) return -1; /* The start order has not been issued */ @@ -542,29 +466,50 @@ static int remote_dep_dequeue_off(parsec_context_t* context) return 0; } -#include "parsec/bindthread.h" - -#if defined PARSEC_PROF_TRACE -static void remote_dep_mpi_profiling_init(void); -#else -#define remote_dep_mpi_profiling_init() do {} while(0) -#endif +static void +remote_dep_mpi_initialize_execution_stream(parsec_context_t *context) +{ + memcpy(&parsec_comm_es, context->virtual_processes[0]->execution_streams[0], sizeof(parsec_execution_stream_t)); +} -static void* remote_dep_dequeue_main(parsec_context_t* context) +void* remote_dep_dequeue_main(parsec_context_t* context) { int whatsup; remote_dep_bind_thread(context); PARSEC_PAPI_SDE_THREAD_INIT(); + remote_dep_ce_init(context); + /* Now synchronize with the main thread */ pthread_mutex_lock(&mpi_thread_mutex); pthread_cond_signal(&mpi_thread_condition); /* This is the main loop. Wait until being woken up by the main thread, do * the MPI stuff until we get the OFF or FINI commands. Then react the them. + * However, the first time do the delayed initialization that could not have + * been done before due to the lack of other component initialization. */ - do { + + /* Let's wait until we are awaken */ + pthread_cond_wait(&mpi_thread_condition, &mpi_thread_mutex); + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine ON on process %d/%d", + context->my_rank, context->nb_nodes); + /* The MPI thread is owning the lock */ + assert( parsec_communication_engine_up == 2 ); + + /* Lazy or delayed initializations */ + remote_dep_mpi_initialize_execution_stream(context); + + remote_dep_mpi_on(context); + /* acknoledge the activation */ + parsec_communication_engine_up = 3; + whatsup = remote_dep_dequeue_nothread_progress(&parsec_comm_es, -1 /* loop till explicitly asked to return */); + PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine OFF on process %d/%d", + context->my_rank, context->nb_nodes); + parsec_communication_engine_up = 1; /* went to sleep */ + + while( -1 != whatsup ) { /* Let's wait until we are awaken */ pthread_cond_wait(&mpi_thread_condition, &mpi_thread_mutex); PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine ON on process %d/%d", @@ -578,16 +523,16 @@ static void* remote_dep_dequeue_main(parsec_context_t* context) PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: comm engine OFF on process %d/%d", context->my_rank, context->nb_nodes); parsec_communication_engine_up = 1; /* went to sleep */ - } while(-1 != whatsup); + } /* Release all resources */ - remote_dep_mpi_fini(context); + remote_dep_ce_fini(context); PARSEC_PAPI_SDE_THREAD_FINI(); return (void*)context; } -static int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) +int remote_dep_dequeue_new_taskpool(parsec_taskpool_t* tp) { if(!mpi_initialized) return 0; remote_dep_inc_flying_messages(tp); @@ -613,17 +558,21 @@ remote_dep_dequeue_delayed_dep_release(parsec_remote_deps_t *deps) return 1; } -static int remote_dep_dequeue_send(parsec_execution_stream_t* es, int rank, - parsec_remote_deps_t* deps) +int +remote_dep_dequeue_send(parsec_execution_stream_t* es, int rank, + parsec_remote_deps_t* deps) { dep_cmd_item_t* item = (dep_cmd_item_t*) calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_ACTIVATE; item->priority = deps->max_priority; item->cmd.activate.peer = rank; - item->cmd.activate.task.deps = (remote_dep_datakey_t)deps; + item->cmd.activate.task.source_deps = (remote_dep_datakey_t)deps; item->cmd.activate.task.output_mask = 0; - item->cmd.activate.task.tag = 0; + item->cmd.activate.task.callback_fn = 0; + item->cmd.activate.task.remote_memory_handle = NULL; /* we don't have it yet */ + item->cmd.activate.task.remote_callback_data = (remote_dep_datakey_t)NULL; + /* if MPI is multithreaded do not thread-shift the send activate */ if( parsec_comm_es.virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT ) { parsec_list_item_singleton(&item->pos_list); /* NOTE: this disables aggregation in MT cases. */ @@ -636,41 +585,34 @@ static int remote_dep_dequeue_send(parsec_execution_stream_t* es, int rank, } void parsec_remote_dep_memcpy(parsec_execution_stream_t* es, - parsec_taskpool_t* tp, - parsec_data_copy_t *dst, - parsec_data_copy_t *src, - parsec_dep_data_description_t* data) + parsec_taskpool_t* tp, + parsec_data_copy_t *dst, + parsec_data_copy_t *src, + parsec_dep_data_description_t* data) { assert( dst ); - - /* if MPI is multithreaded do not thread-shift the sendrecv */ - if( es->virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT ) { - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, - "COPY [in content] LOCAL DATA from %p[%p] to %p[%p] count %d", - (char*)PARSEC_DATA_COPY_GET_PTR(src) + data->local.src_displ, data->local.src_datatype, - (char*)PARSEC_DATA_COPY_GET_PTR(dst) + data->local.dst_displ, data->local.dst_datatype, - data->local.dst_count); - MPI_Sendrecv((char*)PARSEC_DATA_COPY_GET_PTR(src) + data->local.src_displ, - data->local.src_count, data->local.src_datatype, 0, es->th_id, - (char*)PARSEC_DATA_COPY_GET_PTR(dst) + data->local.dst_displ, - data->local.dst_count, data->local.dst_datatype, 0, es->th_id, - dep_self, MPI_STATUS_IGNORE); - return; + /* if the communication engine supports multithreads do the reshaping in place */ + if( parsec_ce.parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT ) { + if( 0 == parsec_ce.reshape(&parsec_ce, es, + dst, data->local.dst_displ, data->local.dst_datatype, data->local.dst_count, + src, data->local.src_displ, data->local.src_datatype, data->local.src_count) ) { + return; + } } PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "create MEMCPY request from %p to %p count %d", - (char*)PARSEC_DATA_COPY_GET_PTR(src) + data->local.src_displ, - (char*)PARSEC_DATA_COPY_GET_PTR(dst) + data->local.dst_displ, - data->local.dst_count); + PARSEC_DATA_COPY_GET_PTR(src) + data->local.src_displ, + PARSEC_DATA_COPY_GET_PTR(dst) + data->local.dst_displ, + data->local.src_count); dep_cmd_item_t* item = (dep_cmd_item_t*)calloc(1, sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(item, parsec_list_item_t); item->action = DEP_MEMCPY; item->priority = 0; - item->cmd.memcpy.taskpool = tp; - item->cmd.memcpy.source = src; - item->cmd.memcpy.destination = dst; - item->cmd.memcpy.layout = data->local; + item->cmd.memcpy.taskpool = tp; + item->cmd.memcpy.source = src; + item->cmd.memcpy.destination = dst; + item->cmd.memcpy.layout = data->local; PARSEC_OBJ_RETAIN(src); remote_dep_inc_flying_messages(tp); @@ -774,11 +716,9 @@ void parsec_local_reshape(parsec_base_future_t *future, es->th_id, dt->data, dt->data->dtt, type_name_src, reshape_data, dt->local->dst_datatype, type_name_dst, task_string, future); - MPI_Sendrecv((char*)PARSEC_DATA_COPY_GET_PTR(dt->data) + dt->local->src_displ, dt->local->src_count, dt->local->src_datatype, - 0, es->th_id, - (char*)PARSEC_DATA_COPY_GET_PTR(reshape_data) + dt->local->dst_displ, dt->local->dst_count, dt->local->dst_datatype, - 0, es->th_id, - dep_self, MPI_STATUS_IGNORE); + parsec_ce.reshape(&parsec_ce, es, + reshape_data, dt->local->dst_displ, dt->local->dst_datatype, dt->local->dst_count, + dt->data, dt->local->src_displ, dt->local->src_datatype, dt->local->src_count); parsec_future_set(future, reshape_data); @@ -890,8 +830,9 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, PARSEC_DEBUG_VERBOSE(30, parsec_comm_output_stream, "MPI: retrieve dtt for %s [dep_datatype_index %x] DTT: old %s new %s (%p) --> PACKED", newcontext->task_class->name, dep->dep_datatype_index, type_name_src, type_name_dst, output->data.remote.dst_datatype); #endif + // TODO JS: implement MPI_Pack_size int dsize; - MPI_Pack_size(output->data.remote.dst_count, output->data.remote.dst_datatype, dep_comm, &dsize); + MPI_Pack_size(output->data.remote.dst_count, output->data.remote.dst_datatype, MPI_COMM_WORLD, &dsize); output->data.remote.src_count = output->data.remote.dst_count = dsize; output->data.remote.src_datatype = output->data.remote.dst_datatype = PARSEC_DATATYPE_PACKED; @@ -922,7 +863,6 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, parsec_remote_deps_t* origin, int storage_id, int *position) { - parsec_task_t task; uint32_t i, j, k, local_mask = 0; assert(NULL == origin->taskpool); @@ -930,10 +870,6 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, if( NULL == origin->taskpool ) return -1; /* the parsec taskpool doesn't exist yet */ - task.taskpool = origin->taskpool; - /* Do not set the task.task_class here, because it might trigger a race condition in DTD */ - task.priority = 0; /* unknown yet */ - /* This function is divided into DTD and PTG's logic */ if( PARSEC_TASKPOOL_TYPE_DTD == origin->taskpool->taskpool_type ) { parsec_dtd_taskpool_t *dtd_tp = NULL; @@ -977,7 +913,7 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, char* packed_buffer; /* Copy the short data to some temp storage */ packed_buffer = malloc(origin->msg.length); - memcpy(packed_buffer, dep_activate_buff[storage_id] + *position, origin->msg.length); + memcpy(packed_buffer, origin->eager_msg + *position, origin->msg.length); *position += origin->msg.length; /* move to the next order */ origin->taskpool = (parsec_taskpool_t*)packed_buffer; /* temporary storage */ } @@ -989,28 +925,27 @@ remote_dep_get_datatypes(parsec_execution_stream_t* es, if(return_defer) { return -2; } - task.task_class = task.taskpool->task_classes_array[origin->msg.task_class_id]; - - for(i = 0; i < task.task_class->nb_flows; - task.data[i].data_in = task.data[i].data_out = NULL, task.data[i].source_repo_entry = NULL, task.data[i].source_repo = NULL, i++); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tRetrieve datatype with mask 0x%x (remote_dep_get_datatypes)", local_mask); - task.locals[k] = origin->msg.locals[k]; - task.task_class = dtd_task->super.task_class; origin->msg.task_class_id = dtd_task->super.task_class->task_class_id; - origin->output[k].data.remote.src_datatype = origin->output[k].data.remote.dst_datatype = PARSEC_DATATYPE_NULL; - task.task_class->iterate_successors(es, (parsec_task_t *)dtd_task, + dtd_task->super.task_class->iterate_successors(es, (parsec_task_t *)dtd_task, local_mask, remote_dep_mpi_retrieve_datatype, origin); } } else { - task.task_class = task.taskpool->task_classes_array[origin->msg.task_class_id]; + parsec_task_t task; + task.taskpool = origin->taskpool; + /* Do not set the task.task_class here, because it might trigger a race condition in DTD */ + task.priority = 0; /* unknown yet */ + + task.task_class = task.taskpool->task_classes_array[origin->msg.task_class_id]; for(i = 0; i < task.task_class->nb_flows; - task.data[i].data_in = task.data[i].data_out = NULL, task.data[i].source_repo_entry = NULL, task.data[i].source_repo = NULL, i++); - + task.data[i].data_in = task.data[i].data_out = NULL, + task.data[i].source_repo_entry = NULL, + task.data[i].source_repo = NULL, i++); + for(i = 0; i < task.task_class->nb_locals; i++) task.locals[i] = origin->msg.locals[i]; @@ -1086,7 +1021,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, target = task.task_class->out[++pidx]; assert(NULL != target); } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tDATA %p(%s) released from %p[%d] flow idx %d", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tDATA %p(%s) released from %p[%d] flow idx %d", origin->output[i].data.data, target->name, origin, i, target->flow_index); task.data[target->flow_index].source_repo = NULL; task.data[target->flow_index].source_repo_entry = NULL; @@ -1116,7 +1051,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, } else { assert(0); } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tTranslate mask from 0x%lx to 0x%x (remote_dep_release_incoming)", complete_mask, action_mask); (void)task.task_class->release_deps(es, &task, action_mask | PARSEC_ACTION_RELEASE_LOCAL_DEPS | PARSEC_ACTION_RESHAPE_REMOTE_ON_RELEASE, @@ -1163,36 +1098,7 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, return NULL; } -#ifndef PARSEC_REMOTE_DEP_USE_THREADS -static int remote_dep_dequeue_nothread_init(parsec_context_t* context) -{ - parsec_dequeue_construct(&dep_cmd_queue); - parsec_list_construct(&dep_cmd_fifo); - if(-1 == context->comm_ctx) { - MPI_Comm comm; - MPI_Comm_dup(MPI_COMM_WORLD, &comm); - context->comm_ctx = (intptr_t)comm; - assert(-1 != context->comm_ctx /* -1 reserved for non-initialized */); - } - return remote_dep_mpi_setup(context); -} - -static int remote_dep_dequeue_nothread_fini(parsec_context_t* context) -{ - remote_dep_mpi_fini(context); - parsec_list_destruct(&dep_cmd_fifo); - parsec_dequeue_destruct(&dep_cmd_queue); - return 0; -} -#endif - -/** - * Progress the network pushing as many of the pending commands as possible. - * First, extract actions from the cmd queue, and rearrange them (priority and - * target) before draining the network and pushing out the highest priority - * actions. - */ -static int +int remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, int cycles) { @@ -1323,40 +1229,20 @@ remote_dep_dequeue_nothread_progress(parsec_execution_stream_t* es, goto check_pending_queues; } -/****************************************************************************** - * ALL MPI SPECIFIC CODE GOES HERE - ******************************************************************************/ -enum { - REMOTE_DEP_ACTIVATE_TAG = 0, - REMOTE_DEP_GET_DATA_TAG, - REMOTE_DEP_MAX_CTRL_TAG -} parsec_remote_dep_tag_t; #ifdef PARSEC_PROF_TRACE -static int MPI_Activate_sk, MPI_Activate_ek; -static int64_t get = 0; -static int MPI_Data_ctl_sk, MPI_Data_ctl_ek; -static int MPI_Data_plds_sk, MPI_Data_plds_ek; -static int MPI_Data_pldr_sk, MPI_Data_pldr_ek; -static int activate_cb_trace_sk, activate_cb_trace_ek; -static int put_cb_trace_sk, put_cb_trace_ek; +int MPI_Activate_sk, MPI_Activate_ek; +int MPI_Data_ctl_sk, MPI_Data_ctl_ek; +int MPI_Data_plds_sk, MPI_Data_plds_ek; +int MPI_Data_pldr_sk, MPI_Data_pldr_ek; +int activate_cb_trace_sk, activate_cb_trace_ek; +int put_cb_trace_sk, put_cb_trace_ek; /** * The structure describe the MPI events saves into the profiling stream. The following * string represent it's description so that an external package can decrypt the * binary format of the stream. */ -typedef struct { - int rank_src; // 0 - int rank_dst; // 4 - uint64_t tid; // 8 - uint32_t tpid; // 16 - uint32_t tcid; // 20 - int32_t msg_size; // 24 - int32_t padding; // 28 -- this field is not necessary, but the structure will be padded - // by the compiler due to the uint64_t field. It is declared here - // just to be consistent with the conversion string. -} parsec_profile_remote_dep_mpi_info_t; // 32 bytes static char parsec_profile_remote_dep_mpi_info_to_string[] = "src{int32_t};" "dst{int32_t};" @@ -1395,348 +1281,26 @@ static void remote_dep_mpi_profiling_init(void) &put_cb_trace_sk, &put_cb_trace_ek); parsec_comm_es.es_profile = parsec_profiling_stream_init( 2*1024*1024, "MPI thread"); + parsec_profiling_set_default_thread(parsec_comm_es.es_profile); } static void remote_dep_mpi_profiling_fini(void) { - /* TODO: we need to clean the profiling threads memory */ + /* Nothing to do, the thread_profiling structures will be automatically + * released when the master profiling system is shut down. + */ } - -#define TAKE_TIME_WITH_INFO(PROF, KEY, I, src, dst, rdw, nbdtt, dtt, comm) \ - if( parsec_profile_enabled ) { \ - parsec_profile_remote_dep_mpi_info_t __info; \ - parsec_taskpool_t *__tp = parsec_taskpool_lookup( (rdw).taskpool_id ); \ - const parsec_task_class_t *__tc = __tp->task_classes_array[(rdw).task_class_id ]; \ - __info.rank_src = (src); \ - __info.rank_dst = (dst); \ - __info.tpid = __tp->taskpool_id; \ - __info.tcid = (rdw).task_class_id; \ - __info.tid = __tc->key_functions->key_hash( \ - __tc->make_key(__tp, (rdw).locals), NULL); \ - MPI_Pack_size(nbdtt, dtt, comm, &__info.msg_size); \ - PARSEC_PROFILING_TRACE((PROF), (KEY), (I), \ - PROFILE_OBJECT_ID_NULL, &__info); \ - } - -#define TAKE_TIME(PROF, KEY, I) PARSEC_PROFILING_TRACE((PROF), (KEY), (I), PROFILE_OBJECT_ID_NULL, NULL) - #else -#define TAKE_TIME_WITH_INFO(PROF, KEY, I, src, dst, rdw, count, dtt, comm) do {} while(0) -#define TAKE_TIME(PROF, KEY, I) do {} while(0) + #define remote_dep_mpi_profiling_init() do {} while(0) #define remote_dep_mpi_profiling_fini() do {} while(0) -#endif /* PARSEC_PROF_TRACE */ - -typedef int (*parsec_comm_callback_f)(parsec_execution_stream_t*, - parsec_comm_callback_t*, /**< the associated callback structure */ - MPI_Status* status); /**< the corresponding status */ -struct parsec_comm_callback_s { - parsec_comm_callback_f fct; - void* cb_data; - int idx; /* index of the MPI request in the array of request */ -}; - -static parsec_comm_callback_t *array_of_callbacks; -static MPI_Request *array_of_requests; -static int *array_of_indices; -static MPI_Status *array_of_statuses; - -/* TODO: fix heterogeneous restriction by using proper mpi datatypes */ -#define dep_dtt MPI_BYTE -#define dep_count sizeof(remote_dep_wire_activate_t) -#define dep_extent dep_count -#define DEP_SHORT_BUFFER_SIZE (dep_extent+RDEP_MSG_SHORT_LIMIT) -#if ULONG_MAX == UINTPTR_MAX -#define datakey_dtt MPI_LONG -#else -#define datakey_dtt MPI_LONG_LONG -#endif -#define datakey_count 3 -static remote_dep_wire_get_t* dep_get_buff; - -/* Pointers are converted to ptrdiff_t to be used as keys to fetch data in the get - * rdv protocol. Make sure we can carry pointers correctly. - */ -#ifdef PARSEC_HAVE_LIMITS_H -#include -#endif - -/* note: tags are necessary to order communication between pairs. They are used to - * correctly handle data transfers, as each data provider will provide a tag which - * combined with the source ensure message matching consistency. As MPI requires the - * max tag to be positive, initializing it to a negative value allows us to check - * if the layer has been initialized or not. - */ -static int MAX_MPI_TAG = -1, mca_tag_ub = -1; -#define MIN_MPI_TAG (REMOTE_DEP_MAX_CTRL_TAG+1) -static volatile int __VAL_NEXT_TAG = MIN_MPI_TAG; -#if INT_MAX == INT32_MAX -#define next_tag_cas(t, o, n) parsec_atomic_cas_int32(t, o, n) -#elif INT_MAX == INT64_MAX -#define next_tag_cas(t, o, n) parsec_atomic_cas_int64(t, o, n) -#else -#error "next_tag_cas written to support sizeof(int) of 4 or 8" -#endif -static inline int next_tag(int k) { - int __tag, __tag_o, __next_tag; -reread: - __tag = __tag_o = __VAL_NEXT_TAG; - if( __tag > (MAX_MPI_TAG-k) ) { - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "rank %d tag rollover: min %d < %d (+%d) < max %d", parsec_debug_rank, - MIN_MPI_TAG, __tag, k, MAX_MPI_TAG); - __tag = MIN_MPI_TAG; - } - __next_tag = __tag+k; - - if( parsec_comm_es.virtual_process->parsec_context->flags & PARSEC_CONTEXT_FLAG_COMM_MT ) { - if(!next_tag_cas(&__VAL_NEXT_TAG, __tag_o, __next_tag)) { - goto reread; - } - } - else { - __VAL_NEXT_TAG = __next_tag; - } - return __tag; -} - -/** - * The following 2 functions take care of all the steps necessary to initialize the - * invariable part of the communication engine such as the const dependencies - * to MPI (max tag and other global info), or local objects. - */ -static int remote_dep_mpi_init_once(parsec_context_t* context) -{ - int mpi_tag_ub_exists, *ub; - - assert(-1 == MAX_MPI_TAG); - PARSEC_OBJ_CONSTRUCT(&dep_activates_fifo, parsec_list_t); - PARSEC_OBJ_CONSTRUCT(&dep_activates_noobj_fifo, parsec_list_t); - PARSEC_OBJ_CONSTRUCT(&dep_put_fifo, parsec_list_t); - - assert(MPI_COMM_NULL == dep_self); - MPI_Comm_dup(MPI_COMM_SELF, &dep_self); - assert(MPI_COMM_NULL == dep_comm); - - /* - * Based on MPI 1.1 the MPI_TAG_UB should only be defined - * on MPI_COMM_WORLD. - */ -#if defined(PARSEC_HAVE_MPI_20) - MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &ub, &mpi_tag_ub_exists); -#else - MPI_Attr_get(MPI_COMM_WORLD, MPI_TAG_UB, &ub, &mpi_tag_ub_exists); -#endif /* defined(PARSEC_HAVE_MPI_20) */ - - parsec_mca_param_reg_int_name("mpi", "tag_ub", - "The upper bound of the TAG used by the MPI communication engine. Bounded by the MPI_TAG_UB attribute on the MPI implementation MPI_COMM_WORLD. (-1 for MPI default)", - false, false, -1, &mca_tag_ub); - - if( !mpi_tag_ub_exists ) { - MAX_MPI_TAG = (-1 == mca_tag_ub) ? INT_MAX : mca_tag_ub; - parsec_warning("Your MPI implementation does not define MPI_TAG_UB and thus violates the standard (MPI-2.2, page 29, line 30). The max tag is therefore set using the MCA mpi_tag_ub (current value %d).\n", MAX_MPI_TAG); - } else { - MAX_MPI_TAG = ((-1 == mca_tag_ub) || (mca_tag_ub > *ub)) ? *ub : mca_tag_ub; - } - if( MAX_MPI_TAG < INT_MAX ) { - parsec_debug_verbose(3, parsec_comm_output_stream, - "MPI:\tYour MPI implementation defines the maximal TAG value to %d (0x%08x)," - " which might be too small should you have more than %d pending remote dependencies", - MAX_MPI_TAG, (unsigned int)MAX_MPI_TAG, MAX_MPI_TAG / MAX_DEP_OUT_COUNT); - } - - remote_dep_mpi_profiling_init(); - (void)context; - return 0; -} - -/** - * The communication engine is now completely disabled. All internal resources - * are released, and no future communications are possible. - * Anything initialized in init_once must be disposed off here - */ -static int remote_dep_mpi_fini(parsec_context_t* context) -{ - if( -1 == MAX_MPI_TAG ) { - /* Current process hasn't participated in any taskpools */ - /* Release the context communicators if any */ - if( -1 != context->comm_ctx) { - MPI_Comm_free((MPI_Comm*)&context->comm_ctx); - context->comm_ctx = -1; /* We use -1 for the opaque comm_ctx, rather than the MPI specific MPI_COMM_NULL */ - } - return 0; - } - - remote_dep_mpi_cleanup(context); - - /* Remove the static handles */ - MPI_Comm_free(&dep_self); /* dep_self becomes MPI_COMM_NULL */ - - /* Release the context communicators if any */ - if( -1 != context->comm_ctx) { - MPI_Comm_free((MPI_Comm*)&context->comm_ctx); - context->comm_ctx = -1; /* We use -1 for the opaque comm_ctx, rather than the MPI specific MPI_COMM_NULL */ - } - - PARSEC_OBJ_DESTRUCT(&dep_activates_fifo); - PARSEC_OBJ_DESTRUCT(&dep_activates_noobj_fifo); - PARSEC_OBJ_DESTRUCT(&dep_put_fifo); - MAX_MPI_TAG = -1; /* mark the layer as uninitialized */ - remote_dep_mpi_profiling_fini(); - - (void)context; - return 0; -} - -static int remote_dep_mpi_setup(parsec_context_t* context) -{ - parsec_comm_callback_t* cb; - int i, rc; - - if( -1 == MAX_MPI_TAG ) - if( 0 != (rc = remote_dep_mpi_init_once(context)) ) { - parsec_debug_verbose(3, parsec_comm_output_stream, "MPI: Failed to correctly retrieve the max TAG." - " PaRSEC cannot continue using MPI\n"); - return rc; - } - - /* Did anything changed that would require a build of the management structures? */ - assert(-1 != context->comm_ctx); - if(dep_comm == (MPI_Comm)context->comm_ctx) { - return 0; - } - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "rank %d ENABLE MPI communication engine", - parsec_debug_rank); - if(MPI_COMM_NULL != dep_comm) { - parsec_debug_verbose(3, parsec_comm_output_stream, "MPI: Rearming the dep_comm and dep_self."); - /* Cleanup prior setup */ - remote_dep_mpi_cleanup(context); - } - assert(-1 != context->comm_ctx); - dep_comm = (MPI_Comm) context->comm_ctx; - - MPI_Comm_size(dep_comm, &(context->nb_nodes)); - MPI_Comm_rank(dep_comm, &(context->my_rank)); - - parsec_mpi_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; - parsec_mpi_same_pos_items = (dep_cmd_item_t**)calloc(parsec_mpi_same_pos_items_size, - sizeof(dep_cmd_item_t*)); - /* Extend the number of pending activations if we have a large number of peers */ - if( context->nb_nodes > (10*parsec_comm_activations_max) ) - parsec_comm_activations_max = context->nb_nodes / 10; - if( context->nb_nodes > (10*parsec_comm_data_get_max) ) - parsec_comm_data_get_max = context->nb_nodes / 10; - DEP_NB_REQ = (parsec_comm_activations_max + parsec_comm_data_get_max + - parsec_comm_gets_max + parsec_comm_puts_max); - - array_of_callbacks = (parsec_comm_callback_t*)calloc(DEP_NB_REQ, sizeof(parsec_comm_callback_t)); - array_of_requests = (MPI_Request*)calloc(DEP_NB_REQ, sizeof(MPI_Request)); - array_of_indices = (int*)calloc(DEP_NB_REQ, sizeof(int)); - array_of_statuses = (MPI_Status*)calloc(DEP_NB_REQ, sizeof(MPI_Status)); - for(i = 0; i < DEP_NB_REQ; i++) - array_of_requests[i] = MPI_REQUEST_NULL; - - /* Create all the persistent receives (activation and GET orders) and start them */ - dep_activate_buff = (char**)calloc(parsec_comm_activations_max, sizeof(char*)); - dep_activate_buff[0] = (char*)calloc(parsec_comm_activations_max, DEP_SHORT_BUFFER_SIZE*sizeof(char)); - for(i = 0; i < parsec_comm_activations_max; i++) { - dep_activate_buff[i] = dep_activate_buff[0] + i * DEP_SHORT_BUFFER_SIZE*sizeof(char); - MPI_Recv_init(dep_activate_buff[i], DEP_SHORT_BUFFER_SIZE, MPI_PACKED, - MPI_ANY_SOURCE, REMOTE_DEP_ACTIVATE_TAG, dep_comm, - &array_of_requests[parsec_comm_last_active_req]); - cb = &array_of_callbacks[parsec_comm_last_active_req]; - cb->fct = remote_dep_mpi_save_activate_cb; - cb->cb_data = (void*)(uintptr_t)parsec_comm_last_active_req; - cb->idx = i; - MPI_Start(&array_of_requests[parsec_comm_last_active_req]); - parsec_comm_last_active_req++; - } - - dep_get_buff = (remote_dep_wire_get_t*)calloc(parsec_comm_data_get_max, sizeof(remote_dep_wire_get_t)); - for(i = 0; i < parsec_comm_data_get_max; i++) { - MPI_Recv_init(&dep_get_buff[i], datakey_count, datakey_dtt, - MPI_ANY_SOURCE, REMOTE_DEP_GET_DATA_TAG, dep_comm, - &array_of_requests[parsec_comm_last_active_req]); - cb = &array_of_callbacks[parsec_comm_last_active_req]; - cb->fct = remote_dep_mpi_save_put_cb; - cb->cb_data = (void*)(uintptr_t)parsec_comm_last_active_req; - cb->idx = i; - MPI_Start(&array_of_requests[parsec_comm_last_active_req]); - parsec_comm_last_active_req++; - } - - return 0; -} - -#include "parsec/utils/mca_param.h" - -static void remote_dep_mpi_params(parsec_context_t* context) { - (void)context; -#if RDEP_MSG_SHORT_LIMIT != 0 - parsec_mca_param_reg_sizet_name("runtime", "comm_short_limit", "Controls the maximum size of a short message. Short messages contain both the control message notifying the completion of a task and the associated data that fit completely in that buffer length. The maximum size of a short message should be lower than the network MTU.", - false, false, parsec_param_short_limit, &parsec_param_short_limit); - if(parsec_param_short_limit > RDEP_MSG_SHORT_LIMIT) { - parsec_warning("User requested a short message limit of %d which is greater than compiled in limit %d; value reset to compiled limit %d", parsec_param_short_limit, RDEP_MSG_SHORT_LIMIT, RDEP_MSG_SHORT_LIMIT); - parsec_param_short_limit = RDEP_MSG_SHORT_LIMIT; - } -#endif - parsec_mca_param_reg_int_name("runtime", "comm_aggregate", "Aggregate multiple dependencies in the same short message (1=true,0=false).", - false, false, parsec_param_enable_aggregate, &parsec_param_enable_aggregate); -} - -void -remote_dep_mpi_initialize_execution_stream(parsec_context_t *context) -{ - memcpy(&parsec_comm_es, context->virtual_processes[0]->execution_streams[0], sizeof(parsec_execution_stream_t)); -} - -/** - * This function should be called to release all internal structures - * needed to handle the internals of the communication engine. Upon - * return however, if the engine is to be reused it has to be reinitialized. - * In general this function is called either upon finalization of the PaRSEC - * engine or by the communication thread early during the startup process, in - * order to prepare for message exchange. - * BEWARE: this function release the communicator, it has to be called collectively. - */ -static int remote_dep_mpi_cleanup(parsec_context_t* context) -{ - int i, flag; - MPI_Status status; - - /* Cancel and release all persistent requests */ - for(i = 0; i < parsec_comm_activations_max + parsec_comm_data_get_max; i++) { - MPI_Cancel(&array_of_requests[i]); - MPI_Test(&array_of_requests[i], &flag, &status); - MPI_Request_free(&array_of_requests[i]); - assert( MPI_REQUEST_NULL == array_of_requests[i] ); - } - parsec_comm_last_active_req -= (parsec_comm_activations_max + parsec_comm_data_get_max); - assert(0 == parsec_comm_last_active_req); - - free(array_of_callbacks); array_of_callbacks = NULL; - free(array_of_requests); array_of_requests = NULL; - free(array_of_indices); array_of_indices = NULL; - free(array_of_statuses); array_of_statuses = NULL; - - free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; - parsec_mpi_same_pos_items_size = 0; - - free(dep_get_buff); dep_get_buff = NULL; - free(dep_activate_buff[0]); - free(dep_activate_buff); dep_activate_buff = NULL; - /* Force a reconstruction of the internal dep_comm but without free it, in order - * to avoid releasing the context->comm_ctx. - */ - dep_comm = MPI_COMM_NULL; +#endif /* PARSEC_PROF_TRACE */ - (void)context; - return 0; -} static int remote_dep_mpi_on(parsec_context_t* context) { - remote_dep_mpi_setup(context); + // TODO: make sure this is correct with revamp #if defined(PARSEC_PROF_TRACE) /* This is less than ideal, but remote_dep_mpi_setup * holds a mpi_comm_dup() which is often implemented @@ -1766,7 +1330,7 @@ static int remote_dep_mpi_pack_dep(int peer, int length, int* position) { - parsec_remote_deps_t *deps = (parsec_remote_deps_t*)item->cmd.activate.task.deps; + parsec_remote_deps_t *deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; remote_dep_wire_activate_t* msg = &deps->msg; int k, dsize, saved_position = *position; uint32_t peer_bank, peer_mask, expected = 0; @@ -1778,9 +1342,9 @@ static int remote_dep_mpi_pack_dep(int peer, peer_bank = peer / (sizeof(uint32_t) * 8); peer_mask = 1U << (peer % (sizeof(uint32_t) * 8)); - MPI_Pack_size(dep_count, dep_dtt, dep_comm, &dsize); + parsec_ce.pack_size(&parsec_ce, dep_count, dep_dtt, &dsize); if( (length - (*position)) < dsize ) { /* no room. bail out */ - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "Can't pack at %d/%d. Bail out!", *position, length); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Can't pack at %d/%d. Bail out!", *position, length); return 1; } /* Don't pack yet, we need to update the length field before packing */ @@ -1799,10 +1363,10 @@ static int remote_dep_mpi_pack_dep(int peer, parsec_set_CTL_dep(deps->output[k].data); #endif if( parsec_is_CTL_dep(deps->output[k].data) ) { - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, " CTL\t%s\tparam %d\tdemoted to be a control", tmp, k); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, " CTL\t%s\tparam %d\tdemoted to be a control", tmp, k); continue; } - assert(deps->output[k].data.remote.src_count > 0); + #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) if(PARSEC_DATATYPE_NULL == deps->output[k].data.remote.src_datatype) { parsec_fatal("Output %d of %s has not defined a datatype: check that the data collection does" @@ -1811,49 +1375,25 @@ static int remote_dep_mpi_pack_dep(int peer, } #endif -#ifdef PARSEC_RESHAPE_BEFORE_SEND_TO_REMOTE - /* If we want to reshape before sending, we don't do short messages. */ - if( (deps->output[k].data.data_future == NULL) && (parsec_param_short_limit) ) { -#else - if( parsec_param_short_limit ) { -#endif - /* Embed data (up to short size) with the activate msg only if not reshaping needs to be performed */ - MPI_Pack_size(deps->output[k].data.remote.src_count, deps->output[k].data.remote.src_datatype, - dep_comm, &dsize); - if((length - (*position)) >= dsize) { - MPI_Pack((char*)PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data) + deps->output[k].data.remote.src_displ, - deps->output[k].data.remote.src_count, deps->output[k].data.remote.src_datatype, - packed_buffer, length, position, dep_comm); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, " EGR\t%s\tparam %d\tshort piggyback in the activate msg (%d/%d)", - tmp, k, *position, length); - msg->length += dsize; - continue; /* go to the next */ - } else if( 0 != saved_position ) { - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "DATA\t%s\tparam %d\texceed buffer length. Start again from here next iteration", - tmp, k); - *position = saved_position; - return 1; - } - /* the data doesn't fit in the buffer. */ - } + // TODO JS: add back short message packing + expected++; item->cmd.activate.task.output_mask |= (1U<pending_ack); } if(expected) (void)parsec_atomic_fetch_add_int32(&deps->pending_ack, expected); /* Keep track of the inflight data */ - /* We can only have up to k data sends related to this remote_dep (include the order itself) */ - item->cmd.activate.task.tag = next_tag(k); - msg->tag = item->cmd.activate.task.tag; + #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tTO\t%d\tActivate\t% -8s\n" - " \t\t\twith datakey %lx\tmask %lx\t(tag=%d) short mask %lu length %d", - peer, tmp, msg->deps, msg->output_mask, msg->tag, + parsec_debug_verbose(6, parsec_debug_output, "MPI:\tTO\t%d\tActivate\t% -8s\n" + " \t\t\twith datakey %lx\tmask %lx\t(tag=%d) eager mask %lu length %d", + peer, tmp, msg->deps, msg->output_mask, -1, msg->output_mask ^ item->cmd.activate.task.output_mask, msg->length); #endif /* And now pack the updated message (msg->length and msg->output_mask) itself. */ - MPI_Pack(msg, dep_count, dep_dtt, packed_buffer, length, &saved_position, dep_comm); + parsec_ce.pack(&parsec_ce, msg, dep_count, dep_dtt, packed_buffer, length, &saved_position); + msg->length = dsize; return 0; } @@ -1870,18 +1410,16 @@ static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, (char*)PARSEC_DATA_COPY_GET_PTR(cmd->memcpy.destination) + cmd->memcpy.layout.dst_displ, cmd->memcpy.layout.dst_datatype, cmd->memcpy.layout.dst_count); - int rc = MPI_Sendrecv((char*)PARSEC_DATA_COPY_GET_PTR(cmd->memcpy.source ) + cmd->memcpy.layout.src_displ, - cmd->memcpy.layout.src_count, cmd->memcpy.layout.src_datatype, 0, 0, - (char*)PARSEC_DATA_COPY_GET_PTR(cmd->memcpy.destination) + cmd->memcpy.layout.dst_displ, - cmd->memcpy.layout.dst_count, cmd->memcpy.layout.dst_datatype, 0, 0, - dep_self, MPI_STATUS_IGNORE); + int rc = parsec_ce.reshape(&parsec_ce, es, + cmd->memcpy.destination, cmd->memcpy.layout.dst_displ, cmd->memcpy.layout.dst_datatype, cmd->memcpy.layout.dst_count, + cmd->memcpy.source, cmd->memcpy.layout.src_displ, cmd->memcpy.layout.src_datatype, cmd->memcpy.layout.src_count); + PARSEC_DATA_COPY_RELEASE(cmd->memcpy.source); remote_dep_dec_flying_messages(item->cmd.memcpy.taskpool); (void)es; - return (MPI_SUCCESS == rc ? 0 : -1); + return rc; } - /** * * Routine to fulfill a reshape promise by the communication thread. @@ -1938,18 +1476,23 @@ static int local_dep_nothread_reshape(parsec_execution_stream_t* es, static int remote_dep_nothread_send(parsec_execution_stream_t* es, dep_cmd_item_t **head_item) { + (void)es; parsec_remote_deps_t *deps; dep_cmd_item_t *item = *head_item; parsec_list_item_t* ring = NULL; char packed_buffer[DEP_SHORT_BUFFER_SIZE]; int peer, position = 0; +#ifdef PARSEC_PROF_TRACE + static int save_act = 0; + int event_id = parsec_atomic_fetch_inc_int32(&save_act); +#endif /* PARSEC_PROF_TRACE */ peer = item->cmd.activate.peer; /* this doesn't change */ - deps = (parsec_remote_deps_t*)item->cmd.activate.task.deps; + deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; pack_more: assert(peer == item->cmd.activate.peer); - deps = (parsec_remote_deps_t*)item->cmd.activate.task.deps; + deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; parsec_list_item_singleton((parsec_list_item_t*)item); if( 0 == remote_dep_mpi_pack_dep(peer, item, packed_buffer, @@ -1970,114 +1513,107 @@ static int remote_dep_nothread_send(parsec_execution_stream_t* es, TAKE_TIME_WITH_INFO(es->es_profile, MPI_Activate_sk, 0, es->virtual_process->parsec_context->my_rank, - peer, deps->msg, position, MPI_PACKED, dep_comm); - MPI_Send((void*)packed_buffer, position, MPI_PACKED, peer, REMOTE_DEP_ACTIVATE_TAG, dep_comm); - TAKE_TIME(es->es_profile, MPI_Activate_ek, 0); + peer, deps->msg, position, MPI_PACKED, MPI_COMM_WORLD); + parsec_ce.send_am(&parsec_ce, REMOTE_DEP_ACTIVATE_TAG, peer, packed_buffer, position); + TAKE_TIME(es->es_profile, MPI_Activate_ek, event_id); DEBUG_MARK_CTL_MSG_ACTIVATE_SENT(peer, (void*)&deps->msg, &deps->msg); do { item = (dep_cmd_item_t*)ring; ring = parsec_list_item_ring_chop(ring); - deps = (parsec_remote_deps_t*)item->cmd.activate.task.deps; + deps = (parsec_remote_deps_t*)item->cmd.activate.task.source_deps; + free(item); /* only large messages are left */ + remote_dep_complete_and_cleanup(&deps, 1); } while( NULL != ring ); - (void)es; return 0; } +/** + * Progress the network pushing as many of the pending commands as possible. + * First, extract actions from the cmd queue, and rearrange them (priority and + * target) before draining the network and pushing out the highest priority + * actions. + */ static int remote_dep_mpi_progress(parsec_execution_stream_t* es) { - MPI_Status *status; - int ret = 0, idx, outcount, pos; - parsec_comm_callback_t* cb; + int ret = 0; if( !PARSEC_THREAD_IS_MASTER(es) ) return 0; - do { - MPI_Testsome(parsec_comm_last_active_req, array_of_requests, - &outcount, array_of_indices, array_of_statuses); - if(0 == outcount) goto feed_more_work; /* can we push some more work? */ - - /* Trigger the callbacks */ - for( idx = 0; idx < outcount; idx++ ) { - - cb = &array_of_callbacks[array_of_indices[idx]]; - status = &(array_of_statuses[idx]); - - cb->fct(es, cb, status); - ret++; - } - - /* Compact the pending requests in order to minimize the testsome waiting time. - * Parsing the array_of_indices in the reverse order insure a smooth and fast - * compacting. - */ - for( idx = outcount-1; idx >= 0; idx-- ) { - pos = array_of_indices[idx]; - if(MPI_REQUEST_NULL != array_of_requests[pos]) - continue; /* The callback replaced the completed request, keep going */ - /* Get the last active callback to replace the empty one */ - parsec_comm_last_active_req--; - if( parsec_comm_last_active_req > pos ) { - array_of_requests[pos] = array_of_requests[parsec_comm_last_active_req]; - array_of_callbacks[pos] = array_of_callbacks[parsec_comm_last_active_req]; - } - array_of_requests[parsec_comm_last_active_req] = MPI_REQUEST_NULL; - } + ret = parsec_ce.progress(&parsec_ce); - feed_more_work: - if((parsec_comm_gets < parsec_comm_gets_max) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { + if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { parsec_remote_deps_t* deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); - remote_dep_mpi_get_start(es, deps); - ret++; - } - if((parsec_comm_puts < parsec_comm_puts_max) && !parsec_list_nolock_is_empty(&dep_put_fifo)) { + remote_dep_mpi_get_start(es, deps); + ret++; + } + if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_put_fifo)) { dep_cmd_item_t* item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); - remote_dep_mpi_put_start(es, item); - ret++; - } - } while( 0 != outcount); + remote_dep_mpi_put_start(es, item); + ret++; + } + return ret; } static int -remote_dep_mpi_save_put_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, - MPI_Status* status) +remote_dep_mpi_save_put_cb(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) { + (void) ce; (void) tag; (void) cb_data; (void) msg_size; remote_dep_wire_get_t* task; parsec_remote_deps_t *deps; dep_cmd_item_t* item; #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif + parsec_execution_stream_t* es = &parsec_comm_es; item = (dep_cmd_item_t*) malloc(sizeof(dep_cmd_item_t)); PARSEC_OBJ_CONSTRUCT(&item->super, parsec_list_item_t); item->action = DEP_GET_DATA; - item->cmd.activate.peer = status->MPI_SOURCE; + item->cmd.activate.peer = src; task = &(item->cmd.activate.task); - memcpy(task, &dep_get_buff[cb->idx], sizeof(remote_dep_wire_get_t)); - deps = (parsec_remote_deps_t*) (uintptr_t) task->deps; + /* copy the static part of the message, the part after this contains the memory_handle + * of the other side. + */ + memcpy(task, msg, sizeof(remote_dep_wire_get_t)); + + /* we are expecting exactly one wire_get_t + remote memory handle */ + assert(msg_size == sizeof(remote_dep_wire_get_t) + ce->get_mem_handle_size()); + + item->cmd.activate.remote_memory_handle = malloc(ce->get_mem_handle_size()); + memcpy( item->cmd.activate.remote_memory_handle, + ((char*)msg) + sizeof(remote_dep_wire_get_t), + ce->get_mem_handle_size() ); + + deps = (parsec_remote_deps_t*)(remote_dep_datakey_t)task->source_deps; /* get our deps back */ assert(0 != deps->pending_ack); assert(0 != deps->outgoing_mask); item->priority = deps->max_priority; + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put cb_received for %s from %d tag %u which 0x%x (deps %p)", + remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, + -1, task->output_mask, (void*)deps); + /* Get the highest priority PUT operation */ parsec_list_nolock_push_sorted(&dep_put_fifo, (parsec_list_item_t*)item, dep_cmd_prio); - if( parsec_comm_puts < parsec_comm_puts_max ) { + if( parsec_ce.can_serve(&parsec_ce) ) { item = (dep_cmd_item_t*)parsec_list_nolock_pop_front(&dep_put_fifo); remote_dep_mpi_put_start(es, item); } else { - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI: Put DELAYED for %s from %d tag %u which 0x%x (deps %p)", remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), item->cmd.activate.peer, - task->tag, task->output_mask, (void*)deps); + -1, task->output_mask, (void*)deps); } - /* Let's re-enable the pending request in the same position */ - MPI_Start(&array_of_requests[(int)(ptrdiff_t)cb->cb_data]); - return 0; + return 1; } static void @@ -2086,9 +1622,8 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, { remote_dep_wire_get_t* task = &(item->cmd.activate.task); #if !defined(PARSEC_PROF_DRY_DEP) - parsec_remote_deps_t* deps = (parsec_remote_deps_t*) (uintptr_t) task->deps; - int k, nbdtt, tag = task->tag; - parsec_comm_callback_t* cb; + parsec_remote_deps_t* deps = (parsec_remote_deps_t*) (uintptr_t) task->source_deps; + int k, nbdtt; void* dataptr; MPI_Datatype dtt; #endif /* !defined(PARSEC_PROF_DRY_DEP) */ @@ -2102,7 +1637,7 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, #if !defined(PARSEC_PROF_DRY_DEP) assert(task->output_mask); - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->deps); + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tPUT mask=%lx deps 0x%lx", task->output_mask, task->source_deps); #ifdef PARSEC_RESHAPE_BEFORE_SEND_TO_REMOTE int all_completed = 1; @@ -2152,67 +1687,105 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, assert(k < MAX_PARAM_COUNT); if(!((1U<output_mask)) continue; - if(parsec_comm_puts == parsec_comm_puts_max) { - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\treach PUT limit for deps 0x%lx. Reschedule.", deps); - parsec_list_nolock_push_front(&dep_put_fifo, (parsec_list_item_t*)item); - return; - } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\t[idx %d mask(0x%x / 0x%x)] %p, %p", k, (1U<output_mask, deps->output[k].data.data, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data)); dataptr = PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data); dtt = deps->output[k].data.remote.src_datatype; nbdtt = deps->output[k].data.remote.src_count; + (void) nbdtt; + + task->output_mask ^= (1U<cmd.activate.remote_memory_handle; + #if defined(PARSEC_DEBUG_NOISIER) MPI_Type_get_name(dtt, type_name, &len); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type %s (%p)\t(tag=%d displ = %ld)", - item->cmd.activate.peer, k, task->deps, dataptr, type_name, dtt, tag+k, deps->output[k].data.remote.src_displ); + PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\t%d\tPut START\tunknown \tk=%d\twith deps 0x%lx at %p type %s (%p)\t(src_mem_handle = %p, dst_mem_handle = %p)", + item->cmd.activate.peer, k, task->source_deps, dataptr, type_name, dtt, source_memory_handle, remote_memory_handle); #endif + remote_dep_cb_data_t *cb_data = (remote_dep_cb_data_t *) parsec_thread_mempool_allocate + (parsec_remote_dep_cb_data_mempool->thread_mempools); + cb_data->deps = deps; + cb_data->k = k; + TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_plds_sk, k, es->virtual_process->parsec_context->my_rank, - item->cmd.activate.peer, deps->msg, nbdtt, dtt, dep_comm); - task->output_mask ^= (1U<output[k].data.remote.src_displ, nbdtt, dtt, - item->cmd.activate.peer, tag + k, dep_comm, - &array_of_requests[parsec_comm_last_active_req]); - cb = &array_of_callbacks[parsec_comm_last_active_req]; - cb->fct = remote_dep_mpi_put_end_cb; - cb->cb_data = (void*)deps; - cb->idx = k; - parsec_comm_last_active_req++; + item->cmd.activate.peer, deps->msg, nbdtt, dtt, MPI_COMM_WORLD); + + /* the remote side should send us 8 bytes as the callback data to be passed back to them */ + parsec_ce.put(&parsec_ce, source_memory_handle, 0, + remote_memory_handle, 0, + 0, item->cmd.activate.peer, + remote_dep_mpi_put_end_cb, cb_data, + (parsec_ce_tag_t)task->callback_fn, &task->remote_callback_data, sizeof(uintptr_t)); + parsec_comm_puts++; - assert(parsec_comm_last_active_req <= DEP_NB_REQ); - DEBUG_MARK_DTA_MSG_START_SEND(item->cmd.activate.peer, dataptr, tag+k); } #endif /* !defined(PARSEC_PROF_DRY_DEP) */ if(0 == task->output_mask) { - PARSEC_DEBUG_VERBOSE(100, parsec_comm_output_stream, "PUT_START output_maks completed for item %p, freeing", item); + if(NULL != item->cmd.activate.remote_memory_handle) { + free(item->cmd.activate.remote_memory_handle); + item->cmd.activate.remote_memory_handle = NULL; + } free(item); } } static int -remote_dep_mpi_put_end_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, - MPI_Status* status) +remote_dep_mpi_put_end_cb(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data) { - parsec_remote_deps_t* deps = (parsec_remote_deps_t*)cb->cb_data; + (void) ldispl; (void) rdispl; (void) size; (void) remote; (void) rreg; + /* Retreive deps from callback_data */ + parsec_remote_deps_t* deps = ((remote_dep_cb_data_t *)cb_data)->deps; + + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams bla\t(src_mem_hanlde = %p, dst_mem_handle=%p", + ((remote_dep_cb_data_t *)cb_data)->k, deps, lreg, rreg); + +#if defined(PARSEC_PROF_TRACE) + parsec_profiling_ts_trace(MPI_Data_plds_ek, ((remote_dep_cb_data_t *)cb_data)->k, PROFILE_OBJECT_ID_NULL, NULL); +#endif - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\tna\tPut END \tunknown \tk=%d\twith deps %p\tparams %lx\t(tag=%d) data ptr %p", - cb->idx, deps, (long)cb->idx, status->MPI_TAG, - deps->output[cb->idx].data.data); (void)status; - DEBUG_MARK_DTA_MSG_END_SEND(status->MPI_TAG); - TAKE_TIME(es->es_profile, MPI_Data_plds_ek, cb->idx); remote_dep_complete_and_cleanup(&deps, 1); + + ce->mem_unregister(&lreg); + parsec_thread_mempool_free(parsec_remote_dep_cb_data_mempool->thread_mempools, cb_data); + parsec_comm_puts--; - (void)es; - return 0; + return 1; } + /** * An activation message has been received, and the remote_dep_wire_activate_t * part has already been extracted into the deps->msg. This function handles the - * rest of the receiver logic, extract the possible short and control data from + * rest of the receiver logic, extract the possible eager and control data from * the buffer, post all the control messages to initiate RGET, and all other local * cleanups. */ @@ -2222,15 +1795,17 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, int length, int* position) { + (void) length; (void) position; + (void) packed_buffer; remote_dep_datakey_t complete_mask = 0; - int k, dsize, tag = (int)deps->msg.tag; (void)tag; + int k; #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN); #endif #if defined(PARSEC_DEBUG) || defined(PARSEC_DEBUG_NOISIER) - parsec_debug_verbose(6, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate\t% -8s\n" + parsec_debug_verbose(6, parsec_debug_output, "MPI:\tFROM\t%d\tActivate\t% -8s\n" "\twith datakey %lx\tparams %lx length %d (pack buf %d/%d) prio %d", deps->from, tmp, deps->msg.deps, deps->incoming_mask, deps->msg.length, *position, length, deps->max_priority); @@ -2239,35 +1814,12 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, if(!(deps->incoming_mask & (1U<output[k].data) ){ - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tHERE\t%d\tGet NONE\t% -8s\tk=%d\twith datakey %lx at type CONTROL", deps->from, tmp, k, deps->msg.deps); /* deps->output[k].data.data = NULL; This is unnecessary*/ complete_mask |= (1U< *position) ) { - /* Check if the data is short-embedded in the activate */ - MPI_Pack_size(deps->output[k].data.remote.src_count, deps->output[k].data.remote.src_datatype, - dep_comm, &dsize); - if((length - (*position)) >= dsize) { - assert(NULL == deps->output[k].data.data); /* we do not support in-place tiles now, make sure it doesn't happen yet */ - if(NULL == deps->output[k].data.data) { - deps->output[k].data.data = remote_dep_copy_allocate(&deps->output[k].data.remote); - } -#ifndef PARSEC_PROF_DRY_DEP - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, " EGR\t%s\tparam %d\tshort from the activate msg (%d/%d)", - tmp, k, dsize, length - *position); - MPI_Unpack(packed_buffer, length, position, - (char*)PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data) + deps->output[k].data.remote.dst_displ, - deps->output[k].data.remote.dst_count, deps->output[k].data.remote.dst_datatype, dep_comm); -#endif - complete_mask |= (1U<from, tmp, k, deps->msg.deps, tag+k); } assert(length == *position); @@ -2276,8 +1828,8 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, #if defined(PARSEC_DEBUG_NOISIER) for(int k = 0; complete_mask>>k; k++) if((1U<from, tmp, k, deps->msg.deps, deps->output[k].data.data, tag+k ); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tHERE\t%d\tGet PREEND\t% -8s\tk=%d\twith datakey %lx at %p ALREADY SATISFIED\t(tag=%d)", + deps->from, tmp, k, deps->msg.deps, deps->output[k].data.data, k ); #endif /* If this is the only call then force the remote deps propagation */ deps = remote_dep_release_incoming(es, deps, complete_mask); @@ -2291,45 +1843,48 @@ static void remote_dep_mpi_recv_activate(parsec_execution_stream_t* es, } /* Check if we have any pending GET orders */ - if((parsec_comm_gets < parsec_comm_gets_max) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { + if(parsec_ce.can_serve(&parsec_ce) && !parsec_list_nolock_is_empty(&dep_activates_fifo)) { deps = (parsec_remote_deps_t*)parsec_list_nolock_pop_front(&dep_activates_fifo); remote_dep_mpi_get_start(es, deps); } } static int -remote_dep_mpi_save_activate_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, - MPI_Status* status) +remote_dep_mpi_save_activate_cb(parsec_comm_engine_t *ce, parsec_ce_tag_t tag, + void *msg, size_t msg_size, int src, + void *cb_data) { + (void) tag; (void) cb_data; + parsec_execution_stream_t* es = &parsec_comm_es; + PARSEC_PINS(es, ACTIVATE_CB_BEGIN, NULL); #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif - int position = 0, length, rc; + int position = 0, length = msg_size, rc; parsec_remote_deps_t* deps = NULL; - MPI_Get_count(status, MPI_PACKED, &length); while(position < length) { deps = remote_deps_allocate(&parsec_remote_dep_context.freelist); - MPI_Unpack(dep_activate_buff[cb->idx], length, &position, - &deps->msg, dep_count, dep_dtt, dep_comm); - deps->from = status->MPI_SOURCE; + + ce->unpack(ce, msg, length, &position, &deps->msg, dep_count, dep_dtt); + deps->from = src; + deps->eager_msg = msg; /* Retrieve the data arenas and update the msg.incoming_mask to reflect * the data we should be receiving from the predecessor. */ - rc = remote_dep_get_datatypes(es, deps, cb->idx, &position); + rc = remote_dep_get_datatypes(es, deps, 0, &position); if( -1 == rc ) { /* the corresponding tp doesn't exist, yet. Put it in unexpected */ char* packed_buffer; - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tFROM\t%d\tActivate NoTPool\t% -8s\tk=%d\twith datakey %lx\tparams %lx", deps->from, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), - cb->idx, deps->msg.deps, deps->msg.output_mask); - /* Copy the short data to some temp storage */ + 0, deps->msg.deps, deps->msg.output_mask); + /* Copy the eager data to some temp storage */ packed_buffer = malloc(deps->msg.length); - memcpy(packed_buffer, dep_activate_buff[cb->idx] + position, deps->msg.length); + memcpy(packed_buffer, msg + position, deps->msg.length); position += deps->msg.length; /* move to the next order */ deps->taskpool = (parsec_taskpool_t*)packed_buffer; /* temporary storage */ parsec_list_nolock_push_back(&dep_activates_noobj_fifo, (parsec_list_item_t*)deps); @@ -2342,23 +1897,22 @@ remote_dep_mpi_save_activate_cb(parsec_execution_stream_t* es, } } - PARSEC_DEBUG_VERBOSE(20, parsec_comm_output_stream, "MPI:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", - status->MPI_SOURCE, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), - cb->idx, deps->msg.deps, deps->msg.output_mask); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "MPI:\tFROM\t%d\tActivate\t% -8s\tk=%d\twith datakey %lx\tparams %lx", + src, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), + 0, deps->msg.deps, deps->msg.output_mask); /* Import the activation message and prepare for the reception */ - remote_dep_mpi_recv_activate(es, deps, dep_activate_buff[cb->idx], + remote_dep_mpi_recv_activate(es, deps, msg, position + deps->msg.length, &position); assert( parsec_param_enable_aggregate || (position == length)); } assert(position == length); - /* Let's re-enable the pending request in the same position */ - MPI_Start(&array_of_requests[(int)(ptrdiff_t)cb->cb_data]); PARSEC_PINS(es, ACTIVATE_CB_END, NULL); - return 0; + return 1; } -static void remote_dep_mpi_new_taskpool( parsec_execution_stream_t* es, - dep_cmd_item_t *dep_cmd_item ) +void +remote_dep_mpi_new_taskpool(parsec_execution_stream_t* es, + dep_cmd_item_t *dep_cmd_item) { parsec_list_item_t *item; parsec_taskpool_t* obj = dep_cmd_item->cmd.new_taskpool.tp; @@ -2390,7 +1944,6 @@ static void remote_dep_mpi_new_taskpool( parsec_execution_stream_t* es, continue; } - remote_dep_mpi_recv_activate(es, deps, buffer, deps->msg.length, &position); free(buffer); (void)rc; @@ -2407,8 +1960,8 @@ static void remote_dep_mpi_new_taskpool( parsec_execution_stream_t* es, * the remote task. */ static void -remote_dep_mpi_release_delayed_deps( parsec_execution_stream_t* es, - dep_cmd_item_t *item ) +remote_dep_mpi_release_delayed_deps(parsec_execution_stream_t* es, + dep_cmd_item_t *item) { PARSEC_PINS(es, ACTIVATE_CB_BEGIN, NULL); parsec_remote_deps_t *deps = item->cmd.release.deps; @@ -2416,7 +1969,7 @@ remote_dep_mpi_release_delayed_deps( parsec_execution_stream_t* es, char* buffer = (char*)deps->taskpool; /* get back the buffer from the "temporary" storage */ deps->taskpool = NULL; - rc = remote_dep_get_datatypes(es, deps, PARSEC_DTD_SKIP_SAVING, &position); + rc = remote_dep_get_datatypes(es, deps, 1, &position); assert(rc != -2); (void)rc; @@ -2439,71 +1992,103 @@ static void remote_dep_mpi_get_start(parsec_execution_stream_t* es, int len; remote_dep_cmd_to_string(task, tmp, MAX_TASK_STRLEN); #endif - +#ifdef PARSEC_PROF_TRACE + int32_t save_get = 0; + int32_t event_id = parsec_atomic_fetch_inc_int32(&save_get); +#endif /* PARSEC_PROF_TRACE */ for(k = count = 0; deps->incoming_mask >> k; k++) if( ((1U<incoming_mask) ) count++; - if( (parsec_comm_gets + count) > parsec_comm_gets_max ) { - assert(deps->msg.output_mask != 0); - assert(deps->incoming_mask != 0); - parsec_list_nolock_push_front(&dep_activates_fifo, (parsec_list_item_t*)deps); - return; - } + (void)es; DEBUG_MARK_CTL_MSG_ACTIVATE_RECV(from, (void*)task, task); - msg.output_mask = deps->incoming_mask; /* Only get what I need */ - msg.deps = task->deps; - msg.tag = task->tag; + msg.source_deps = task->deps; /* the deps copied from activate message from source */ + msg.callback_fn = (uintptr_t)remote_dep_mpi_get_end_cb; /* We let the source know to call this + * function when the PUT is over, in a true + * one sided case the (integer) value of this + * function pointer will be registered as the + * TAG to receive the same notification. */ for(k = 0; deps->incoming_mask >> k; k++) { if( !((1U<incoming_mask) ) continue; + msg.output_mask = 0; /* Only get what I need */ + msg.output_mask |= (1U<thread_mempools); + callback_data->deps = deps; + callback_data->k = k; /* prepare the local receiving data */ assert(NULL == deps->output[k].data.data); /* we do not support in-place tiles now, make sure it doesn't happen yet */ if(NULL == deps->output[k].data.data) { deps->output[k].data.data = remote_dep_copy_allocate(&deps->output[k].data.remote); } -#ifdef PARSEC_PROF_DRY_DEP - (void)dtt; (void)nbdtt; (void)msg; (void)from; - /* Removing the corresponding bit prevent the sending of the GET_DATA request */ - remote_dep_mpi_get_end(es, k, deps); - deps->incoming_mask ^= (1U<output[k].data.remote.dst_datatype; nbdtt = deps->output[k].data.remote.dst_count; + + /* We have the remote mem_handle. + * Let's allocate our mem_reg_handle + * and let the source know. + */ + parsec_ce_mem_reg_handle_t receiver_memory_handle; + size_t receiver_memory_handle_size; + + if(parsec_ce.capabilites.supports_noncontiguous_datatype) { + parsec_ce.mem_register(PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), PARSEC_MEM_TYPE_NONCONTIGUOUS, + nbdtt, dtt, + -1, + &receiver_memory_handle, &receiver_memory_handle_size); + } else { + /* TODO: Implement converter to pack and unpack */ + int dtt_size; + parsec_type_size(dtt, &dtt_size); + parsec_ce.mem_register(PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), PARSEC_MEM_TYPE_CONTIGUOUS, + -1, NULL, + dtt_size, + &receiver_memory_handle, &receiver_memory_handle_size); + + } + # if defined(PARSEC_DEBUG_NOISIER) MPI_Type_get_name(dtt, type_name, &len); - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type %s (%p) count %d displ %ld extent %d\t(tag=%d)", + int _size; + MPI_Type_size(dtt, &_size); + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "MPI:\tTO\t%d\tGet START\t% -8s\tk=%d\twith datakey %lx at %p type %s count %d displ %ld \t(k=%d, dst_mem_handle=%p)", from, tmp, k, task->deps, PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data), type_name, dtt, nbdtt, - deps->output[k].data.remote.dst_displ, deps->output[k].data.remote.arena->elem_size * nbdtt, msg.tag+k); + deps->output[k].data.remote.dst_displ, k, receiver_memory_handle); # endif - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_pldr_sk, k, from, - es->virtual_process->parsec_context->my_rank, deps->msg, - nbdtt, dtt, dep_comm); - DEBUG_MARK_DTA_MSG_START_RECV(from, deps->output[k].data.data, msg.tag+k); - MPI_Irecv((char*)PARSEC_DATA_COPY_GET_PTR(deps->output[k].data.data) + deps->output[k].data.remote.dst_displ, nbdtt, - dtt, from, msg.tag + k, dep_comm, - &array_of_requests[parsec_comm_last_active_req]); - parsec_comm_callback_t* cb = &array_of_callbacks[parsec_comm_last_active_req]; - cb->fct = remote_dep_mpi_get_end_cb; - cb->cb_data = (void*)deps; - cb->idx = k; - parsec_comm_last_active_req++; + + callback_data->memory_handle = receiver_memory_handle; + + /* We need multiple information to be passed to the callback_fn we have assigned above. + * We pack the pointer to this callback_data and pass to the other side so we can complete + * cleanup and take necessary action when the data is available on our side */ + msg.remote_callback_data = (remote_dep_datakey_t)callback_data; + + /* We pack the static message(remote_dep_wire_get_t) and our memory_handle and send this message + * to the source. Source is anticipating this exact configuration. + */ + int buf_size = sizeof(remote_dep_wire_get_t) + receiver_memory_handle_size; + void *buf = malloc(buf_size); + memcpy( buf, + &msg, + sizeof(remote_dep_wire_get_t) ); + memcpy( ((char*)buf) + sizeof(remote_dep_wire_get_t), + receiver_memory_handle, + receiver_memory_handle_size ); + + /* Send AM */ + parsec_ce.send_am(&parsec_ce, REMOTE_DEP_GET_DATA_TAG, from, buf, buf_size); + TAKE_TIME(es->es_profile, MPI_Data_ctl_ek, event_id); + + free(buf); + parsec_comm_gets++; - assert(parsec_comm_last_active_req <= DEP_NB_REQ); -#endif - } -#if !defined(PARSEC_PROF_DRY_DEP) - if(msg.output_mask) { - TAKE_TIME_WITH_INFO(es->es_profile, MPI_Data_ctl_sk, get, - from, es->virtual_process->parsec_context->my_rank, - (*task), datakey_count, datakey_dtt, dep_comm); - MPI_Send(&msg, datakey_count, datakey_dtt, from, - REMOTE_DEP_GET_DATA_TAG, dep_comm); - TAKE_TIME(es->es_profile, MPI_Data_ctl_ek, get++); - DEBUG_MARK_CTL_MSG_GET_SENT(from, (void*)&msg, &msg); } -#endif /* !defined(PARSEC_PROF_DRY_DEP) */ } static void remote_dep_mpi_get_end(parsec_execution_stream_t* es, @@ -2515,21 +2100,114 @@ static void remote_dep_mpi_get_end(parsec_execution_stream_t* es, } static int -remote_dep_mpi_get_end_cb(parsec_execution_stream_t* es, - parsec_comm_callback_t* cb, - MPI_Status* status) +remote_dep_mpi_get_end_cb(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) { - parsec_remote_deps_t* deps = (parsec_remote_deps_t*)cb->cb_data; + (void) ce; (void) tag; (void) msg_size; (void) cb_data; (void) src; + parsec_execution_stream_t* es = &parsec_comm_es; + + /* We send 8 bytes to the source to give it back to us when the PUT is completed, + * let's retrieve that + */ + uintptr_t *retrieve_pointer_to_callback = (uintptr_t *)msg; + remote_dep_cb_data_t *callback_data = (remote_dep_cb_data_t *)*retrieve_pointer_to_callback; + parsec_remote_deps_t *deps = (parsec_remote_deps_t *)callback_data->deps; + #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif - PARSEC_DEBUG_VERBOSE(10, parsec_comm_output_stream, "MPI:\tFROM\t%d\tGet END \t% -8s\tk=%d\twith datakey na \tparams %lx\t(tag=%d)", - status->MPI_SOURCE, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), - cb->idx, deps->incoming_mask, status->MPI_TAG); (void)status; - DEBUG_MARK_DTA_MSG_END_RECV(status->MPI_TAG); - TAKE_TIME(es->es_profile, MPI_Data_pldr_ek, cb->idx); - remote_dep_mpi_get_end(es, cb->idx, deps); + PARSEC_DEBUG_VERBOSE(6, parsec_debug_output, "MPI:\tFROM\t%d\tGet END \t% -8s\tk=%d\twith datakey na \tparams %lx\t(tag=%d)", + src, remote_dep_cmd_to_string(&deps->msg, tmp, MAX_TASK_STRLEN), + callback_data->k, deps->incoming_mask, src); + + + TAKE_TIME(es->es_profile, MPI_Data_pldr_ek, callback_data->k); + remote_dep_mpi_get_end(es, callback_data->k, deps); + + parsec_ce.mem_unregister(&callback_data->memory_handle); + parsec_thread_mempool_free(parsec_remote_dep_cb_data_mempool->thread_mempools, callback_data); + parsec_comm_gets--; + + return 1; +} + +static int +remote_dep_ce_init(parsec_context_t* context) +{ + int rc; + /* Do this first to give a chance to the communication engine to define + * who this process is by setting the corresponding info in the + * parsec_context. + */ + if( NULL == parsec_comm_engine_init(context) ) { + parsec_warning("Communication engine failed to start. Additional information might be available in the corresponding error message"); + return PARSEC_ERR_NOT_FOUND; + } + + PARSEC_OBJ_CONSTRUCT(&dep_activates_fifo, parsec_list_t); + PARSEC_OBJ_CONSTRUCT(&dep_activates_noobj_fifo, parsec_list_t); + PARSEC_OBJ_CONSTRUCT(&dep_put_fifo, parsec_list_t); + + parsec_mpi_same_pos_items_size = context->nb_nodes + (int)DEP_LAST; + parsec_mpi_same_pos_items = (dep_cmd_item_t**)calloc(parsec_mpi_same_pos_items_size, + sizeof(dep_cmd_item_t*)); + + /* Register Persistant requests */ + rc = parsec_ce.tag_register(REMOTE_DEP_ACTIVATE_TAG, remote_dep_mpi_save_activate_cb, context, + DEP_SHORT_BUFFER_SIZE * sizeof(char)); + if( PARSEC_SUCCESS != rc ) { + parsec_warning("[CE] Failed to register communication tag REMOTE_DEP_ACTIVATE_TAG (error %d)\n", rc); + parsec_comm_engine_fini(&parsec_ce); + return rc; + } + rc = parsec_ce.tag_register(REMOTE_DEP_GET_DATA_TAG, remote_dep_mpi_save_put_cb, context, + 4096); + if( PARSEC_SUCCESS != rc ) { + parsec_warning("[CE] Failed to register communication tag REMOTE_DEP_GET_DATA_TAG (error %d)\n", rc); + parsec_ce.tag_unregister(REMOTE_DEP_ACTIVATE_TAG); + parsec_comm_engine_fini(&parsec_ce); + return rc; + } + + parsec_remote_dep_cb_data_mempool = (parsec_mempool_t*) malloc (sizeof(parsec_mempool_t)); + parsec_mempool_construct(parsec_remote_dep_cb_data_mempool, + PARSEC_OBJ_CLASS(remote_dep_cb_data_t), sizeof(remote_dep_cb_data_t), + offsetof(remote_dep_cb_data_t, mempool_owner), + 1); + + remote_dep_mpi_profiling_init(); return 0; } + +static int +remote_dep_ce_fini(parsec_context_t* context) +{ + remote_dep_mpi_profiling_fini(); + + // Unregister tags + parsec_ce.tag_unregister(REMOTE_DEP_ACTIVATE_TAG); + parsec_ce.tag_unregister(REMOTE_DEP_GET_DATA_TAG); + //parsec_ce.tag_unregister(REMOTE_DEP_PUT_END_TAG); + + parsec_mempool_destruct(parsec_remote_dep_cb_data_mempool); + free(parsec_remote_dep_cb_data_mempool); + + free(parsec_mpi_same_pos_items); parsec_mpi_same_pos_items = NULL; + parsec_mpi_same_pos_items_size = 0; + + PARSEC_OBJ_DESTRUCT(&dep_activates_fifo); + PARSEC_OBJ_DESTRUCT(&dep_activates_noobj_fifo); + PARSEC_OBJ_DESTRUCT(&dep_put_fifo); + + parsec_comm_engine_fini(&parsec_ce); + + (void)context; + return 0; +} + diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index 90e5881a2..3b36c5a63 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -21,4 +21,22 @@ parsec_addtest_executable(C dtd_test_global_id_for_dc_assumed SOURCES dtd_test_g parsec_addtest_executable(C dtd_test_explicit_task_creation SOURCES dtd_test_explicit_task_creation.c) parsec_addtest_executable(C dtd_test_tp_enqueue_dequeue SOURCES dtd_test_tp_enqueue_dequeue.c) parsec_addtest_executable(C dtd_test_interleave_actions SOURCES dtd_test_interleave_actions.c) +<<<<<<< HEAD +======= +parsec_addtest_executable(C dtd_test_ce SOURCES dtd_test_ce.c) + +if( PARSEC_HAVE_CUDA ) + parsec_addtest_executable(C dtd_test_cuda_task_insert SOURCES dtd_test_cuda_task_insert.c) + if( TARGET CUDA::cublas ) + parsec_addtest_executable(C dtd_bench_simple_gemm SOURCES dtd_bench_simple_gemm.c) + target_link_libraries(dtd_bench_simple_gemm PRIVATE CUDA::cublas m) + find_package(BLAS) + if(BLAS_FOUND) + target_link_libraries(dtd_bench_simple_gemm PRIVATE ${BLAS_LIBRARIES}) + target_include_directories(dtd_bench_simple_gemm PRIVATE ${BLAS_INCLUDE_DIRS}) + target_compile_definitions(dtd_bench_simple_gemm PRIVATE HAVE_BLAS=1) + endif(BLAS_FOUND) + endif( TARGET CUDA::cublas ) +endif() +>>>>>>> Attempt to backport the revamp of the communication engine (#380) diff --git a/tests/dsl/dtd/dtd_test_ce.c b/tests/dsl/dtd/dtd_test_ce.c new file mode 100644 index 000000000..3edccb73c --- /dev/null +++ b/tests/dsl/dtd/dtd_test_ce.c @@ -0,0 +1,683 @@ +#include +#include +#include +#include +#include +#include +#include "parsec/parsec_comm_engine.h" + +#include "parsec/runtime.h" + +#define ACTIVE_MESSAGE_FROM_0_TAG 2 +#define ACTIVE_MESSAGE_FROM_1_TAG 3 +#define NOTIFY_ABOUT_GET_FROM_0_TAG 4 +#define NOTIFY_ABOUT_PUT_FROM_0_TAG 5 +#define NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG 6 + +int +get_end(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data); + +int +put_end(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data); + +int +put_end_ack(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data); + +int counter = 0; +int my_rank; + +/* Tag 0 for float message */ +int +callback_tag_2(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) ce; (void) cb_data; + printf("[%d] In callback for tag %"PRIu64", message sent from %d size: %zu message: ", my_rank, tag, src, msg_size); + + int i, total = msg_size/sizeof(int); + + int *buffer = (int *)msg; + printf("["); + for(i = 0; i < total; i++) { + if(i == total - 1) { + printf("%d", buffer[i]); + } else { + printf("%d,", buffer[i]); + } + } + printf("]\n"); + counter++; + + return 1; +} + +/* Tag 1 for int message */ +int +callback_tag_3(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) ce; (void) cb_data; + printf("[%d] In callback for tag %"PRIu64", message sent from %d size: %zu message: ", my_rank, tag, src, msg_size); + + int i, total = msg_size/sizeof(float); + + float *buffer = (float *)msg; + printf("["); + for(i = 0; i < total; i++) { + if(i == total - 1) { + printf("%f", buffer[i]); + } else { + printf("%f,", buffer[i]); + } + } + printf("]\n"); + counter++; + + return 1; +} + +typedef struct handshake_info_s { + int buf_size; + uintptr_t cb_fn; +} handshake_info_t; + + +/* Active Message for GET notification. + * This function will be triggered in rank 1 after rank 0 + * sends an active message to 1 informing about a GET that + * 1 needs to complete. + */ +int +notify_about_get(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) tag; (void) cb_data; (void) msg_size; + assert(my_rank == 1); + + handshake_info_t *GET_activation_message = (handshake_info_t *) msg; + + /* We have the remote memory_handle. + * Let's allocate the local memory_handle + * and let's start the GET. + */ + parsec_ce_mem_reg_handle_t rank_1_memory_handle; + size_t rank_1_memory_handle_size; + /* GET operation will store the actual data in the following buffer */ + int *receive_buf = malloc(sizeof(int) * GET_activation_message->buf_size); + + if(ce->capabilites.supports_noncontiguous_datatype) { + parsec_datatype_t *datatype = malloc(sizeof(parsec_datatype_t)); + parsec_type_create_contiguous(GET_activation_message->buf_size, parsec_datatype_int_t, datatype); + ce->mem_register(receive_buf, PARSEC_MEM_TYPE_NONCONTIGUOUS, + 1, *datatype, + -1, + &rank_1_memory_handle, &rank_1_memory_handle_size); + } else { + ce->mem_register(receive_buf, PARSEC_MEM_TYPE_CONTIGUOUS, + -1, PARSEC_DATATYPE_NULL, + sizeof(int) * GET_activation_message->buf_size, + &rank_1_memory_handle, &rank_1_memory_handle_size); + } + + parsec_ce_mem_reg_handle_t rank_0_memory_handle = (parsec_ce_mem_reg_handle_t)(((char *)GET_activation_message) + sizeof(handshake_info_t)); + + /* Let's start the GET */ + ce->get(ce, rank_1_memory_handle, 0, rank_0_memory_handle, 0, + 0, src, + get_end, (void *) ce, + GET_activation_message->cb_fn, rank_0_memory_handle, ce->get_mem_handle_size()); + + counter++; + + return 1; +} + +/* This will be called in rank 1 when GET is done */ +int +get_end(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data) +{ + (void) ldispl; (void) rdispl; (void) size; (void) remote; (void) cb_data; (void) rreg; + + void *mem; + int mem_size; + int count; + parsec_datatype_t datatype; + + ce->mem_retrieve(lreg, &mem, &datatype, &count); + + parsec_type_size(datatype, &mem_size); + + printf("[%d] GET is over, message:\n[", my_rank); + int *receive_buf = (int *)mem; + int i; + int total = (int)(mem_size/(sizeof(int))); + for(i = 0; i < total; i++) { + if(i == total-1) { + printf("%d", receive_buf[i]); + } else { + printf("%d,", receive_buf[i]); + } + } + printf("]\n"); + + parsec_ce_mem_reg_handle_t rank_1_memory_handle = lreg; + ce->mem_unregister(&rank_1_memory_handle); + + free(receive_buf); + + counter++; + + return 1; +} + +/* This funciton was passed from rank 0 to rank 1 as the notification + * function to be called when the GET is over for clean up on rank 0. + * The memory_handle of rank 0 was also sent to rank 1 to be sent back + * as the callback data. + */ +int +get_end_ack(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) tag; (void) msg_size; (void) src; (void) cb_data; + parsec_ce_mem_reg_handle_t rank_0_memory_handle = (parsec_ce_mem_reg_handle_t) msg; + + /* cb_data is the data passed while this function was registered with the lower + * level comm. engine. */ + void *mem; + int count; + parsec_datatype_t datatype; + + ce->mem_retrieve(rank_0_memory_handle, &mem, &datatype, &count); + + printf("[%d] Notification of GET over received\n", my_rank); + int *send_buf = (int *)mem; + + free(send_buf); + + ce->mem_unregister(&rank_0_memory_handle); + + counter++; + + return 1; +} + +/* Rank 0 send an active message to rank 1 + * notifying about a PUT + */ +int +notify_about_put(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) tag; (void) src; (void) cb_data; (void) msg_size; + assert(my_rank == 1); + + handshake_info_t *PUT_activation_message = (handshake_info_t *) msg; + + parsec_ce_mem_reg_handle_t rank_1_memory_handle; + size_t rank_1_memory_handle_size; + int *receive_buf = malloc(sizeof(int) * PUT_activation_message->buf_size); + + /* We have the remote mem_reg_handle. + * Let's allocate the local mem_reg_handle + * and send both to other side to start a PUT. + */ + + if(ce->capabilites.supports_noncontiguous_datatype) { + parsec_datatype_t *datatype = malloc(sizeof(parsec_datatype_t)); + parsec_type_create_contiguous(PUT_activation_message->buf_size, parsec_datatype_int_t, datatype); + ce->mem_register(receive_buf, PARSEC_MEM_TYPE_NONCONTIGUOUS, + 1, *datatype, + -1, + &rank_1_memory_handle, &rank_1_memory_handle_size); + } else { + ce->mem_register(receive_buf, PARSEC_MEM_TYPE_CONTIGUOUS, + -1, PARSEC_DATATYPE_NULL, + sizeof(int) * PUT_activation_message->buf_size, + &rank_1_memory_handle, &rank_1_memory_handle_size); + } + + handshake_info_t handshake_info; + handshake_info.buf_size = 0; + handshake_info.cb_fn = (uintptr_t) put_end_ack; + + /* Rank 1 has rank 0's memory_handle, it will pack both 0's and it's own + * memory_handle and send it to rank 0. After receiving this message, 0 will + * be able to perform a PUT. + */ + int PUT_forward_mem_handle_message_size = sizeof(handshake_info_t) + (2 * ce->get_mem_handle_size()); + void *PUT_forward_mem_handle_message = malloc(PUT_forward_mem_handle_message_size); + + /* pack the handshake_info_t */ + memcpy( PUT_forward_mem_handle_message, + &handshake_info, + sizeof(handshake_info_t) ); + /* pack rank 0's memory_handle */ + memcpy( ((char *) PUT_forward_mem_handle_message) + sizeof(handshake_info_t), + ((char *) PUT_activation_message) + sizeof(handshake_info_t), + ce->get_mem_handle_size() ); + /* pack rank 1's memory_handle */ + memcpy( ((char *) PUT_forward_mem_handle_message) + sizeof(handshake_info_t) + ce->get_mem_handle_size(), + rank_1_memory_handle, + ce->get_mem_handle_size() ); + + ce->send_am(ce, NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG, 0, PUT_forward_mem_handle_message, PUT_forward_mem_handle_message_size); + + free(PUT_forward_mem_handle_message); + + counter++; + + return 1; +} + +/* This function is called in rank 0 when rank 1 has received the activation message for + * a PUT and is prepared the buffers for rank 0 to complete the PUT. + */ +int +put_ack_am(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) tag; (void) cb_data; (void) msg_size; + assert(my_rank == 0); + + handshake_info_t *PUT_forward_mem_handle_message = (handshake_info_t *) msg; + + parsec_ce_mem_reg_handle_t rank_0_memory_handle = ((char *)PUT_forward_mem_handle_message) + + sizeof(handshake_info_t); + parsec_ce_mem_reg_handle_t rank_1_memory_handle = ((char *)PUT_forward_mem_handle_message) + + sizeof(handshake_info_t) + + ce->get_mem_handle_size(); + + printf("[%d] Received the remote mem_reg_handle and now can start the PUT\n", my_rank); + + ce->put(ce, rank_0_memory_handle, 0, + rank_1_memory_handle, 0, + 0, src, + put_end, NULL, + (uintptr_t) PUT_forward_mem_handle_message->cb_fn, (void *)rank_1_memory_handle, ce->get_mem_handle_size()); + + counter++; + + return 1; +} + +/* This function will be called once the PUT is over in rank 0 */ +int +put_end(parsec_comm_engine_t *ce, + parsec_ce_mem_reg_handle_t lreg, + ptrdiff_t ldispl, + parsec_ce_mem_reg_handle_t rreg, + ptrdiff_t rdispl, + size_t size, + int remote, + void *cb_data) +{ + (void) ldispl; (void) rdispl; (void) size; (void) remote; (void) cb_data; (void) rreg; + printf("[%d] PUT is finished\n", my_rank); + + parsec_ce_mem_reg_handle_t rank_0_memory_handle = lreg; + + void *send_buf; + + int count; + parsec_datatype_t datatype; + + ce->mem_retrieve(rank_0_memory_handle, &send_buf, &datatype, &count); + + free(send_buf); + + ce->mem_unregister(&rank_0_memory_handle); + + counter++; + + return 1; +} + +/* This function is called to notify rank 1 that PUT by 0 is over */ +int +put_end_ack(parsec_comm_engine_t *ce, + parsec_ce_tag_t tag, + void *msg, + size_t msg_size, + int src, + void *cb_data) +{ + (void) tag; (void) msg_size; (void) src; (void)cb_data; + parsec_ce_mem_reg_handle_t rank_1_memory_handle = (parsec_ce_mem_reg_handle_t) msg; + + void *mem; + int mem_size; + + int count; + parsec_datatype_t datatype; + ce->mem_retrieve(rank_1_memory_handle, &mem, &datatype, &count); + + parsec_type_size(datatype, &mem_size); + + printf("[%d] PUT is over, message:\n[", my_rank); + + int *receive_buf = (int *)mem; + int i; + int total = (int)(mem_size/(sizeof(int))); + for(i = 0; i < total; i++) { + if(i == total-1) { + printf("%d", receive_buf[i]); + } else { + printf("%d,", receive_buf[i]); + } + } + printf("]\n"); + + free(receive_buf); + + ce->mem_unregister(&rank_1_memory_handle); + + counter++; + + return 1; +} + +int main(int argc, char **argv) +{ + int rank, world; + int i; + int test_GET = 1; + int test_PUT = 1; + +#if defined(PARSEC_HAVE_MPI) + { + int provided; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); + } + MPI_Comm_size(MPI_COMM_WORLD, &world); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#else + world = 1; + rank = 0; +#endif + + my_rank = rank; + + parsec_comm_engine_t *ce = parsec_comm_engine_init(NULL); + + if( world != 2 ) { + printf("World is too small, too bad! Buh-bye"); + return 0; + } + + ce->tag_register(ACTIVE_MESSAGE_FROM_0_TAG, callback_tag_2, ce, 4096); + ce->tag_register(ACTIVE_MESSAGE_FROM_1_TAG, callback_tag_3, ce, 4096); + + /* Active message for GET notification */ + ce->tag_register(NOTIFY_ABOUT_GET_FROM_0_TAG, notify_about_get, ce, 4096); + + /* Active message for PUT notification */ + ce->tag_register(NOTIFY_ABOUT_PUT_FROM_0_TAG, notify_about_put, ce, 4096); + ce->tag_register(NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG, put_ack_am, ce, 4096); + + if(ce->capabilites.sided == 1) { + /* This is true onesided and we need to register more tags for notifications */ + ce->tag_register((parsec_ce_tag_t)(uintptr_t)get_end_ack, get_end_ack, ce, 4096); + ce->tag_register((parsec_ce_tag_t)(uintptr_t)put_end_ack, put_end_ack, ce, 4096); + } + + /* To make sure all the ranks have the tags registered */ + MPI_Barrier(MPI_COMM_WORLD); + + /* Testing active message */ + if(rank == 0) { + int *intbuffer = NULL; + intbuffer = malloc(3*sizeof(int)); + intbuffer[0] = 10; + intbuffer[1] = 11; + intbuffer[2] = 12; + + printf("[%d] Sending same active message twice to 1, message: [%d,%d,%d]\n", + my_rank, intbuffer[0], intbuffer[1], intbuffer[2]); + + ce->send_am(ce, ACTIVE_MESSAGE_FROM_0_TAG, 1, intbuffer, 3*sizeof(int)); + ce->send_am(ce, ACTIVE_MESSAGE_FROM_0_TAG, 1, intbuffer, 3*sizeof(int)); + + free(intbuffer); + } + if(rank == 1) { + while(counter != 2) { + ce->progress(ce); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + counter = 0; + printf("-------------------------------------\n"); + + if(rank == 1) { + float *floatbuffer = NULL; + floatbuffer = malloc(2*sizeof(float)); + floatbuffer[0] = 9.5; + floatbuffer[1] = 19.5; + + printf("[%d] Sending same active message twice to 0, message: [%f,%f]\n", + my_rank, floatbuffer[0], floatbuffer[1]); + + ce->send_am(ce, ACTIVE_MESSAGE_FROM_1_TAG, 0, floatbuffer, 2*sizeof(float)); + ce->send_am(ce, ACTIVE_MESSAGE_FROM_1_TAG, 0, floatbuffer, 2*sizeof(float)); + + free(floatbuffer); + } + if(rank == 0) { + while(counter != 2) { + ce->progress(ce); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + counter = 0; + printf("-------------------------------------\n"); + + if(test_GET) { + /* Let's test Get from 1 -> 0 (1 gets from 0) */ + if(rank == 0) { + parsec_ce_mem_reg_handle_t rank_0_memory_handle; + size_t rank_0_memory_handle_size; + int buf_size = 9; + int *send_buf = malloc(sizeof(int) * buf_size); + + for(i = 0; i < buf_size; i++) { + send_buf[i] = i; + } + + /* Registering a memory with a mem_reg_handle */ + if(ce->capabilites.supports_noncontiguous_datatype) { + parsec_datatype_t *datatype = malloc(sizeof(parsec_datatype_t)); + parsec_type_create_contiguous(buf_size, parsec_datatype_int_t, datatype); + ce->mem_register(send_buf, PARSEC_MEM_TYPE_NONCONTIGUOUS, + 1, *datatype, + -1, + &rank_0_memory_handle, &rank_0_memory_handle_size); + } else { + ce->mem_register(send_buf, PARSEC_MEM_TYPE_CONTIGUOUS, + -1, PARSEC_DATATYPE_NULL, + buf_size, + &rank_0_memory_handle, &rank_0_memory_handle_size); + } + + printf("[%d] Starting a GET (1 will GET from 0), message:\n[", my_rank); + for(i = 0; i < buf_size; i++) { + if(i == buf_size - 1) { + printf("%d", send_buf[i]); + } else { + printf("%d,", send_buf[i]); + } + } + printf("]\n"); + + handshake_info_t handshake_info; + handshake_info.buf_size = buf_size; + handshake_info.cb_fn = (uintptr_t) get_end_ack; + + /* Actual message sent from 0 to 1 will contain handshake_info_t and + * a copy of the local_memory_handle of rank 0. + */ + int GET_activation_message_size = sizeof(handshake_info_t) + rank_0_memory_handle_size; + void *GET_activation_message = malloc(GET_activation_message_size); + memcpy( GET_activation_message, + &handshake_info, + sizeof(handshake_info_t) ); + memcpy( ((char *)GET_activation_message) + sizeof(handshake_info_t), + rank_0_memory_handle, + rank_0_memory_handle_size ); + + /* 0 lets 1 know that it has some data for 1 to get */ + ce->send_am(ce, NOTIFY_ABOUT_GET_FROM_0_TAG, 1, GET_activation_message, GET_activation_message_size); + + free(GET_activation_message); + + while(counter != 1) { + ce->progress(ce); + } + } + + if(rank == 1) { + while(counter != 2) { + ce->progress(ce); + } + } + + } + + MPI_Barrier(MPI_COMM_WORLD); + counter = 0; + + if(test_PUT) { + printf("-------------------------------------\n"); + /* Let's test PUT from 0 -> 1 (0 puts in 1) */ + if(rank == 0) { + parsec_ce_mem_reg_handle_t rank_0_memory_handle; + size_t rank_0_memory_handle_size; + + int buf_size = 9; + int *send_buf = malloc(sizeof(int) * buf_size); + for(i = 0; i < buf_size; i++) { + send_buf[i] = i * 2; + } + + if(ce->capabilites.supports_noncontiguous_datatype) { + parsec_datatype_t *datatype = malloc(sizeof(parsec_datatype_t)); + parsec_type_create_contiguous(buf_size, parsec_datatype_int_t, datatype); + ce->mem_register(send_buf, PARSEC_MEM_TYPE_NONCONTIGUOUS, + 1, *datatype, + -1, + &rank_0_memory_handle, &rank_0_memory_handle_size); + } else { + ce->mem_register(send_buf, PARSEC_MEM_TYPE_CONTIGUOUS, + -1, PARSEC_DATATYPE_NULL, + buf_size, + &rank_0_memory_handle, &rank_0_memory_handle_size); + } + + printf("[%d] Starting a PUT (0 will PUT in 1), message:\n[", my_rank); + for(i = 0; i < buf_size; i++) { + if(i == buf_size -1) { + printf("%d", send_buf[i]); + } else { + printf("%d,", send_buf[i]); + } + } + printf("]\n"); + + handshake_info_t handshake_info; + handshake_info.buf_size = buf_size; + handshake_info.cb_fn = 0; + + int PUT_activation_message_size = sizeof(handshake_info_t) + rank_0_memory_handle_size; + void *PUT_activation_message = malloc(PUT_activation_message_size); + + memcpy( PUT_activation_message, + &handshake_info, + sizeof(handshake_info_t) ); + memcpy( ((char *)PUT_activation_message) + sizeof(handshake_info_t), + rank_0_memory_handle, + rank_0_memory_handle_size ); + + /* 0 lets 1 know that it has the data for 1 */ + ce->send_am(ce, NOTIFY_ABOUT_PUT_FROM_0_TAG, 1, PUT_activation_message, PUT_activation_message_size); + + free(PUT_activation_message); + + while(counter != 2) { + ce->progress(ce); + } + } + + if(rank == 1) { + while(counter != 2) + ce->progress(ce); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + + ce->tag_unregister(ACTIVE_MESSAGE_FROM_0_TAG); + ce->tag_unregister(ACTIVE_MESSAGE_FROM_1_TAG); + ce->tag_unregister(NOTIFY_ABOUT_GET_FROM_0_TAG); + ce->tag_unregister(NOTIFY_ABOUT_PUT_FROM_0_TAG); + ce->tag_unregister(NOTIFY_ABOUT_MEM_HANDLE_FROM_1_TAG); + + parsec_comm_engine_fini(ce); + +#ifdef PARSEC_HAVE_MPI + MPI_Finalize(); +#endif + + return 0; +} From b15d00488d03cac2140e6b4fa78514e939a93464 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 3 Jun 2022 18:42:15 -0400 Subject: [PATCH 092/215] Respect DISTDIR is provided. We generate a script to be used during the install step, and we need to make sure it uses the DESTDIR provided at install (instead of using the one provided at build time). Signed-off-by: George Bosilca --- tools/profiling/python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/profiling/python/CMakeLists.txt b/tools/profiling/python/CMakeLists.txt index 548770065..63d7e89c9 100644 --- a/tools/profiling/python/CMakeLists.txt +++ b/tools/profiling/python/CMakeLists.txt @@ -82,7 +82,7 @@ add_custom_target(pbt2ptt ALL DEPENDS ${OUTPUT}) # Call python distutils to install all python support in the right location # (aka. according to the OS demands). Prepare to reconfigure the shell # helper scripts to point to the right location -install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} ${SETUP_PY} install --skip-build --prefix=${CMAKE_INSTALL_PREFIX} +install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} ${SETUP_PY} install --skip-build --prefix=\$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${PYTHON_TOOLS_BIN_DIR})") # Create bash environment PaRSEC python support @@ -104,7 +104,7 @@ foreach(file ${pyfiles}) PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) get_filename_component(filenoext "${file}" NAME_WE) get_filename_component(filenodir "${file}" NAME) - install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_INSTALL_PREFIX}/${PARSEC_INSTALL_LIBEXECDIR}/parsec/${filenodir} ${CMAKE_INSTALL_PREFIX}/${PARSEC_INSTALL_BINDIR}/${filenoext})") + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink \$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/${PARSEC_INSTALL_LIBEXECDIR}/parsec/${filenodir} \$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/${PARSEC_INSTALL_BINDIR}/${filenoext})") endforeach() set(PARSEC_PYTHON_TOOLS ON CACHE BOOL "True if Python tools are enabled in PaRSEC") From a870f8d3c2fea7b7ab446a7848dfa34401509e4a Mon Sep 17 00:00:00 2001 From: Omri Mor Date: Tue, 7 Jun 2022 16:46:39 -0400 Subject: [PATCH 093/215] [RFC] profiling tools: more efficient cross-stream event matching (#372) * profiling tools: remove unused `start' arg in matching functions Relevant functions: * dbp_iterator_move_to_matching_event * dbp_iterator_find_matching_event_all_threads These two functions are only ever called with `start' == 0. Matching an end event with a start event doesn't work right anyway and is not supported. Remove this functionality. Signed-off-by: Omri Mor * profiling tools: remove duplicated check for profile buffer size Signed-off-by: Omri Mor * profiling tools: pass dbp_file_t* to refer_events_buffer, not fd This change was made in preparation for more easily supporting a mmap-once implementation, but this has been left for later. Signed-off-by: Omri Mor * profiling tools: factor out dbp_iterator_set_offset, dbp_iterator_next_buffer Signed-off-by: Omri Mor * profiling tools: factor out dbp_events_match Signed-off-by: Omri Mor * profiling tools: misc minor changes and cleanup Due to structure alignment, it's better to put members with greater alignment restrictions at the beginning of a structure and group smaller members at the end. In practice though, this particular change probably does very little. Signed-off-by: Omri Mor * profiling tools: more efficient cross-stream event matching Performance when matching events that start in one profiling stream and end in a different one is abysmal: O(m * n) in the worst case for m such events and n total events. This has largely not been a problem, since the only time this occurs is when using GPUs, and there are relatively few such events. In trying to profile my ongoing work with LCI, this is no longer the case. Communication events almost always start in the progress thread and end in the communication thread, or vice-versa. This results in many such events, so the current behavior is infeasible. This code relies on an observation by George Bosilca that most start events are immediately followed by their end events. This means it is possible to construct a list of end events that are possible matches for cross-stream events. If the matching event for a start event is not found in its own stream, this list can be searched to determine where to start the search in other streams. Since the list is naturally sorted by timestamp, a binary search can be used. I rely on bsearch and a slight hack that stores the last array element in the search key to implement a "fuzzy" timestamp search on a per-event-key basis. This list is constructed lazily when it is needed and only once per stream. Constructing the cache is O(n). Search performance should be O(m * log m) for most cases. There could be some edge cases where the search finds a potential match in a stream, but the true match is on a different stream; in this case, the current code still traverses all possibly-matching events in the mistaken stream. There isn't much to be done about this, as we cannot know that the matching event is not in the stream until it has been traversed in its entirety. Signed-off-by: Omri Mor --- tools/profiling/dbp2xml.c | 2 +- tools/profiling/dbpreader.c | 381 ++++++++++++++++++++++------- tools/profiling/dbpreader.h | 4 +- tools/profiling/python/pbt2ptt.pxd | 2 +- tools/profiling/python/pbt2ptt.pyx | 2 +- 5 files changed, 300 insertions(+), 91 deletions(-) diff --git a/tools/profiling/dbp2xml.c b/tools/profiling/dbp2xml.c index ec394fc75..b21a966f7 100644 --- a/tools/profiling/dbp2xml.c +++ b/tools/profiling/dbp2xml.c @@ -78,7 +78,7 @@ static void dump_one_xml(FILE *tracefile, const dbp_file_t *file, int t) while( (e = dbp_iterator_current(it)) != NULL ) { if( KEY_IS_START( dbp_event_get_key(e) ) && (BASE_KEY( dbp_event_get_key(e) ) == k) ) { - m = dbp_iterator_find_matching_event_all_threads(it, 0); + m = dbp_iterator_find_matching_event_all_threads(it); if( NULL == m ) { WARNING(" Event of class %s id %"PRIu32":%"PRIu64" at %lu does not have a match anywhere\n", dbp_dictionary_name(dbp_file_get_dictionary(file, BASE_KEY(dbp_event_get_key(e)))), diff --git a/tools/profiling/dbpreader.c b/tools/profiling/dbpreader.c index 19fbcb716..5932765e7 100644 --- a/tools/profiling/dbpreader.c +++ b/tools/profiling/dbpreader.c @@ -16,6 +16,7 @@ #include #endif #include +#include #include #include @@ -69,8 +70,8 @@ typedef enum { struct dbp_file { struct dbp_multifile_reader *parent; char *hr_id; - int fd; char *filename; + int fd; int rank; int nb_infos; int nb_threads; @@ -214,11 +215,29 @@ int dbp_file_translate_local_dico_to_global(const dbp_file_t *file, int lid) (EVENT_HAS_INFO((dbp_event)->native) ? \ (dbp_object)->parent->dico_keys[(dbp_object)->dico_map[BASE_KEY((dbp_event)->native->event.key)]].keylen : 0)) +typedef struct { + uint64_t timestamp; + off_t offset; +} event_cache_item_t; + +typedef struct { + event_cache_item_t *items; + size_t len; + size_t size; +} event_cache_key_t; + +typedef struct { + pthread_mutex_t mtx; + event_cache_key_t *keys; + int done; +} event_cache_t; + struct dbp_thread { const parsec_profiling_stream_t *profile; dbp_file_t *file; - int nb_infos; dbp_info_t *infos; + event_cache_t cache; + int nb_infos; }; #if defined(PARSEC_PROFILING_USE_MMAP) @@ -232,10 +251,10 @@ static void release_events_buffer(parsec_profiling_buffer_t *buffer) } } -static parsec_profiling_buffer_t *refer_events_buffer( int fd, int64_t offset ) +static parsec_profiling_buffer_t *refer_events_buffer( const dbp_file_t *file, int64_t offset ) { parsec_profiling_buffer_t *res; - res = mmap(NULL, event_buffer_size, PROT_READ, MAP_SHARED, fd, offset); + res = mmap(NULL, event_buffer_size, PROT_READ, MAP_SHARED, file->fd, offset); if( MAP_FAILED == res ) return NULL; return res; @@ -248,14 +267,14 @@ static void release_events_buffer(parsec_profiling_buffer_t *buffer) free(buffer); } -static parsec_profiling_buffer_t *refer_events_buffer( int fd, int64_t offset ) +static parsec_profiling_buffer_t *refer_events_buffer( const dbp_file_t *file, int64_t offset ) { - off_t pos = lseek(fd, offset, SEEK_SET); + off_t pos = lseek(file->fd, offset, SEEK_SET); if( -1 == pos ) { return NULL; } parsec_profiling_buffer_t *res = (parsec_profiling_buffer_t*)malloc(event_buffer_size); - pos = read(fd, res, event_buffer_size); + pos = read(file->fd, res, event_buffer_size); if( pos <= 0 ) { free(res); res = NULL; @@ -289,7 +308,7 @@ dbp_event_iterator_t *dbp_iterator_new_from_iterator(const dbp_event_iterator_t res->current_event_position = it->current_event_position; res->current_event_index = it->current_event_index; res->current_buffer_position = it->current_buffer_position; - res->current_events_buffer = refer_events_buffer( it->thread->file->fd, res->current_buffer_position ); + res->current_events_buffer = refer_events_buffer( it->thread->file, res->current_buffer_position ); #ifndef _NDEBUG res->last_event_date = it->last_event_date; #endif @@ -308,7 +327,7 @@ const dbp_event_t *dbp_iterator_current(dbp_event_iterator_t *it) return &it->current_event; } -const dbp_event_t *dbp_iterator_first(dbp_event_iterator_t *it) +static const dbp_event_t *dbp_iterator_set_offset(dbp_event_iterator_t *it, off_t offset) { if( it->current_events_buffer != NULL ) { release_events_buffer( it->current_events_buffer ); @@ -316,9 +335,10 @@ const dbp_event_t *dbp_iterator_first(dbp_event_iterator_t *it) it->current_event.native = NULL; } - it->current_events_buffer = refer_events_buffer( it->thread->file->fd, it->thread->profile->first_events_buffer_offset ); - it->current_buffer_position = it->thread->profile->first_events_buffer_offset; + it->current_events_buffer = refer_events_buffer( it->thread->file, offset ); + it->current_buffer_position = offset; it->current_event_position = 0; + it->current_event_index = 0; if( it->current_events_buffer != NULL ) it->current_event.native = (parsec_profiling_output_t*)&(it->current_events_buffer->buffer[it->current_event_position]); else @@ -326,46 +346,81 @@ const dbp_event_t *dbp_iterator_first(dbp_event_iterator_t *it) return dbp_iterator_current(it); } -const dbp_event_t *dbp_iterator_next(dbp_event_iterator_t *it) +const dbp_event_t *dbp_iterator_first(dbp_event_iterator_t *it) +{ + return dbp_iterator_set_offset(it, it->thread->profile->first_events_buffer_offset); +} + +static const dbp_event_t *dbp_iterator_next_buffer(dbp_event_iterator_t *it) { - size_t elen; - parsec_profiling_output_t *current; off_t next_off; - current = it->current_event.native; - if( NULL == current ) + if( NULL == it->current_event.native ) return NULL; - elen = DBP_EVENT_LENGTH(&it->current_event, it->thread->file); assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); - if( it->current_event_index+1 >= it->current_events_buffer->this_buffer.nb_events ) { - next_off = it->current_events_buffer->next_buffer_file_offset; - release_events_buffer( it->current_events_buffer ); - it->current_event_position = 0; - it->current_event_index = 0; - it->current_events_buffer = refer_events_buffer( it->thread->file->fd, next_off ); - it->current_buffer_position = next_off; - - if( NULL == it->current_events_buffer ) { - it->current_event.native = NULL; - return NULL; - } else { - it->current_event.native = (parsec_profiling_output_t*)&(it->current_events_buffer->buffer[it->current_event_position]); - } + + next_off = it->current_events_buffer->next_buffer_file_offset; + release_events_buffer( it->current_events_buffer ); + it->current_event_position = 0; + it->current_event_index = 0; + it->current_events_buffer = refer_events_buffer( it->thread->file, next_off ); + it->current_buffer_position = next_off; + + if( NULL == it->current_events_buffer ) { + it->current_event.native = NULL; } else { - it->current_event_position += elen; it->current_event.native = (parsec_profiling_output_t*)&(it->current_events_buffer->buffer[it->current_event_position]); - it->current_event_index++; } + assert( it->current_event_position <= event_avail_space ); assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); + assert((it->current_event.native == NULL) || + (it->current_event.native->event.timestamp != 0)); - current = it->current_event.native; - assert((current == NULL) || - (current->event.timestamp != 0)); + return dbp_iterator_current(it); +} + +static const dbp_event_t *dbp_iterator_next_in_buffer(dbp_event_iterator_t *it) +{ + size_t elen; + + if( NULL == it->current_event.native ) + return NULL; + assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); + + if( it->current_event_index+1 >= it->current_events_buffer->this_buffer.nb_events ) { + it->current_event.native = NULL; + return NULL; + } + + elen = DBP_EVENT_LENGTH(&it->current_event, it->thread->file); + it->current_event_position += elen; + it->current_event.native = (parsec_profiling_output_t*)&(it->current_events_buffer->buffer[it->current_event_position]); + it->current_event_index++; + + assert( it->current_event_position <= event_avail_space ); + assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); + assert((it->current_event.native == NULL) || + (it->current_event.native->event.timestamp != 0)); return dbp_iterator_current(it); } +const dbp_event_t *dbp_iterator_next(dbp_event_iterator_t *it) +{ + size_t elen; + + if( NULL == it->current_event.native ) + return NULL; + assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); + + if( it->current_event_index+1 >= it->current_events_buffer->this_buffer.nb_events ) { + return dbp_iterator_next_buffer(it); + } + + return dbp_iterator_next_in_buffer(it); +} + const dbp_thread_t *dbp_iterator_thread(const dbp_event_iterator_t *it) { return it->thread; @@ -378,55 +433,211 @@ void dbp_iterator_delete(dbp_event_iterator_t *it) free(it); } +static inline int dbp_events_match(const dbp_event_t *s, const dbp_event_t *e) +{ + int s_key = dbp_event_get_key(s); + int e_key = dbp_event_get_key(e ); + return ( (KEY_IS_START(s_key) && KEY_IS_END(e_key)) && + (BASE_KEY( s_key) == BASE_KEY( e_key)) && + (dbp_event_get_event_id( s) == dbp_event_get_event_id( e)) && + (dbp_event_get_taskpool_id(s) == dbp_event_get_taskpool_id(e)) && + (dbp_event_get_timestamp( s) <= dbp_event_get_timestamp( e)) ); +} + +/* minimum allocation count for cache */ +#define EVENT_CACHE_MIN_ALLOC 64 +/* build a "cache" of events where the end event + * does not immediately follow the start event */ +static void build_unmatched_events_in_thread(dbp_thread_t *thr) +{ + const dbp_event_t *e1, *e2; + dbp_event_iterator_t *i1, *i2; + int key2; + uint64_t timestamp2; + + event_cache_key_t *cache_key; + event_cache_item_t *cache_item; + size_t cache_index; + + /* lock cache; we're modifying volatile state! */ + pthread_mutex_lock(&thr->cache.mtx); + if( thr->cache.done ) { + /* cache already built, we don't need to do anything */ + goto build_events_done; + } + + /* iterator 1 points to current event */ + i1 = dbp_iterator_new_from_thread( thr ); + e1 = dbp_iterator_current( i1 ); + + /* iterator 2 points to next event */ + i2 = dbp_iterator_new_from_thread( thr ); + e2 = dbp_iterator_next( i2 ); + + while( NULL != e2 ) { + key2 = dbp_event_get_key(e2); + + /* if e2 is end event, but e1 doesn't match, e2 not in expected order + * store e2 position in cache to lookup later for potential match */ + if( KEY_IS_END(key2) && !dbp_events_match(e1, e2) ) { + cache_key = &thr->cache.keys[BASE_KEY(key2)]; + + /* if len == 0, this is first mismatched event with this key */ + if( cache_key->len == 0 ) { + cache_key->items = malloc(sizeof(event_cache_item_t[EVENT_CACHE_MIN_ALLOC])); + cache_key->size = EVENT_CACHE_MIN_ALLOC; + } else { + /* if buffer offset of prior mismatched event is same as this, + * we don't need to store the same buffer offset, just update + * the timestamp; we iterate over the entire buffer anyway */ + cache_item = &cache_key->items[cache_key->len - 1]; + if( cache_item->offset == i2->current_buffer_position ) { + timestamp2 = dbp_event_get_timestamp(e2); + if( cache_item->timestamp < timestamp2 ) + cache_item->timestamp = timestamp2; + goto next_iteration; + } + } + + cache_index = cache_key->len++; + /* if index == size, we need to grow the array */ + if( cache_index == cache_key->size ) { + cache_key->size *= 2; + cache_key->items = realloc(cache_key->items, sizeof(event_cache_item_t[cache_key->size])); + } + + /* cache timestamp and buffer offset for this event + * note that we don't store the exact index of the event, + * so a consumer should make sure to search through the buffer */ + cache_item = &cache_key->items[cache_index]; + cache_item->timestamp = dbp_event_get_timestamp(e2); + cache_item->offset = i2->current_buffer_position; + } + + next_iteration: + /* advance both iterators */ + e1 = dbp_iterator_next( i1 ); + e2 = dbp_iterator_next( i2 ); + } + + dbp_iterator_delete( i1 ); + dbp_iterator_delete( i2 ); + + /* set cache to done - it doesn't need to be rebuilt */ + thr->cache.done = 1; + +build_events_done: + pthread_mutex_unlock(&thr->cache.mtx); +} + +typedef struct { + const dbp_event_t *ref; + const event_cache_item_t *last; +} bsearch_key_t; + +static int bsearch_compare(const void *key, const void *el) +{ + /* technically does Bad Thing (shouldn't modify key), but probably works */ + bsearch_key_t *bsearch_key = (bsearch_key_t *)key; + const event_cache_item_t *cache_item = (const event_cache_item_t *)el; + bsearch_key->last = cache_item; + if( dbp_event_get_timestamp(bsearch_key->ref) < cache_item->timestamp ) + return -1; + if( dbp_event_get_timestamp(bsearch_key->ref) > cache_item->timestamp ) + return 1; + return 0; +} + +static const event_cache_item_t *dbp_event_find_in_cache(dbp_thread_t *thr, + const dbp_event_t *ref) +{ + event_cache_key_t *cache_key; + bsearch_key_t bsearch_key = { ref, NULL }; + + /* ensure we have an unmatched event cache */ + build_unmatched_events_in_thread(thr); + + /* do binary search in cache of key for events at ref timestamp + * we throw away the results of the search, because it's very unlikely to + * find this EXACT timestamp; however, we use a side-effect of the search + * in the comparison function (bsearch_compare) to store the last item in + * the cache array that was considered; this is the "insertion point" for + * ref's timestamp and is the timestamp closest to ref's we could find, so + * we return it as the starting point for the subsequent search */ + cache_key = &thr->cache.keys[BASE_KEY(dbp_event_get_key(ref))]; + bsearch(&bsearch_key, cache_key->items, cache_key->len, + sizeof(event_cache_item_t), bsearch_compare); + + return bsearch_key.last; +} + int dbp_iterator_move_to_matching_event(dbp_event_iterator_t *pos, - const dbp_event_t *ref, - int start ) + const dbp_event_t *ref) { - const dbp_event_t *e; - uint64_t ref_eid = dbp_event_get_event_id(ref); - uint32_t ref_hid = dbp_event_get_taskpool_id(ref); - int ref_key = start ? - START_KEY(BASE_KEY(dbp_event_get_key(ref))) : - END_KEY( BASE_KEY(dbp_event_get_key(ref))); - - e = dbp_iterator_current( pos ); - while( NULL != e ) { - if( (dbp_event_get_taskpool_id(e) == ref_hid) && - (dbp_event_get_event_id(e) == ref_eid) && - (dbp_event_get_key(e) == ref_key) ) { - if( dbp_event_get_event_id(e) != 0 || - (dbp_event_get_timestamp(ref) <= dbp_event_get_timestamp(e)) ) { + const event_cache_item_t *cache_item; + const event_cache_key_t *cache_key; + const dbp_event_t *e; + + cache_item = dbp_event_find_in_cache( pos->thread, ref ); + cache_key = &thr->cache.keys[BASE_KEY(dbp_event_get_key(ref))]; + + assert(&cache_key->items[0] <= cache_item) + assert(&cache_key->items[cache_key->len] > cache_item) + + /* iterate over all cached buffers containing possible matches */ + while( cache_item < &cache_key->items[cache_key->len] ) { + /* set iterator for current cached buffer */ + e = dbp_iterator_set_offset(pos, cache_item->offset); + /* iterate over all events in buffer */ + while( NULL != e) { + if( dbp_events_match(ref, e) ) { return 1; - } else if ( dbp_event_get_event_id(e) != 0 ) { - WARNING("Event with ID %d appear in reverse order\n", - dbp_event_get_event_id(e)); } + e = dbp_iterator_next_in_buffer(pos); } - e = dbp_iterator_next( pos ); + cache_item++; } + + /* set iterator to past-the-end */ + dbp_iterator_set_offset(pos, (off_t)-1); return 0; } -dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(const dbp_event_iterator_t *pos, int start) +dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(const dbp_event_iterator_t *pos) { dbp_event_iterator_t *it; + dbp_thread_t *thr; const dbp_event_t *ref; + const dbp_event_t *e; dbp_file_t *dbp_file; - int th; + int tid; + dbp_file = pos->thread->file; ref = dbp_iterator_current((dbp_event_iterator_t *)pos); + + /* most start events are immediately followed by their end event */ it = dbp_iterator_new_from_iterator(pos); - if( dbp_iterator_move_to_matching_event(it, ref, start) ) + e = dbp_iterator_next(it); + /* e can be NULL if pos is last event in stream; there is no next event */ + if( (NULL != e) && dbp_events_match(ref, e) ) return it; dbp_iterator_delete(it); - dbp_file = pos->thread->file; + /* search through possibly matching events in this thread */ + it = dbp_iterator_new_from_thread( pos->thread ); + if( dbp_iterator_move_to_matching_event(it, ref) ) + return it; + dbp_iterator_delete(it); - for(th = dbp_file_nb_threads(dbp_file)-1; th>=0; th--) { - if( pos->thread == dbp_file_get_thread(dbp_file, th) ) + /* try other threads */ + for( tid = 0; tid < dbp_file_nb_threads(dbp_file); tid++) { + thr = dbp_file_get_thread(dbp_file, tid); + /* skip same thread */ + if( pos->thread == thr ) continue; - it = dbp_iterator_new_from_thread( dbp_file_get_thread(dbp_file, th) ); - if( dbp_iterator_move_to_matching_event(it, ref, start) ) + /* same logic as above */ + it = dbp_iterator_new_from_thread( thr ); + if( dbp_iterator_move_to_matching_event(it, ref) ) return it; dbp_iterator_delete(it); } @@ -562,7 +773,7 @@ static void read_infos(dbp_file_t *dbp, parsec_profiling_binary_file_header_t *h dbp->infos = (dbp_info_t**)malloc(sizeof(dbp_info_t*) * dbp->nb_infos); - info = refer_events_buffer(dbp->fd, head->info_offset ); + info = refer_events_buffer(dbp, head->info_offset ); if( NULL == info ) { fprintf(stderr, "Unable to read first info at offset %"PRId64": %d general file info in '%s' lost\n", head->info_offset, dbp->nb_infos, dbp->filename); @@ -599,7 +810,7 @@ static void read_infos(dbp_file_t *dbp, parsec_profiling_binary_file_header_t *h pos += tr; vpos += tr; if( pos == event_avail_space ) { - next = refer_events_buffer( dbp->fd, info->next_buffer_file_offset ); + next = refer_events_buffer( dbp, info->next_buffer_file_offset ); if( NULL == next ) { fprintf(stderr, "Info entry %d is broken. Only %d entries read from '%s'\n", dbp->nb_infos - nb, nb, dbp->filename); @@ -623,7 +834,7 @@ static void read_infos(dbp_file_t *dbp, parsec_profiling_binary_file_header_t *h nb++; if( (nb < dbp->nb_infos) && (nbthis == info->this_buffer.nb_infos) ) { - next = refer_events_buffer( dbp->fd, info->next_buffer_file_offset ); + next = refer_events_buffer( dbp, info->next_buffer_file_offset ); if( NULL == next ) { fprintf(stderr, "Info entry %d is broken. Only %d entries read from '%s'\n", dbp->nb_infos - nb, nb, dbp->filename); @@ -642,7 +853,7 @@ static void read_infos(dbp_file_t *dbp, parsec_profiling_binary_file_header_t *h release_events_buffer( info ); } -static int read_dictionary(dbp_file_t *file, int fd, const parsec_profiling_binary_file_header_t *head) +static int read_dictionary(dbp_file_t *file, const parsec_profiling_binary_file_header_t *head) { parsec_profiling_buffer_t *dico, *next; parsec_profiling_key_buffer_t *a; @@ -650,7 +861,7 @@ static int read_dictionary(dbp_file_t *file, int fd, const parsec_profiling_bina dbp_multifile_reader_t *dbp = file->parent; /* Dictionaries match: take the first in memory */ - dico = refer_events_buffer( fd, head->dictionary_offset ); + dico = refer_events_buffer( file, head->dictionary_offset ); if( NULL == dico ) { fprintf(stderr, "Unable to read entire dictionary entry at offset %"PRId64"\n", head->dictionary_offset); @@ -698,7 +909,7 @@ static int read_dictionary(dbp_file_t *file, int fd, const parsec_profiling_bina nbthis--; if( nb < file->nb_dico_map && nbthis == 0 ) { - next = refer_events_buffer( fd, dico->next_buffer_file_offset ); + next = refer_events_buffer( file, dico->next_buffer_file_offset ); if( NULL == next ) { fprintf(stderr, "Dictionary entry %d is broken. Dictionary broken.\n", nb); release_events_buffer( dico ); @@ -746,6 +957,7 @@ static int read_threads(dbp_file_t *dbp, const parsec_profiling_binary_file_head parsec_profiling_stream_t *res; parsec_profiling_stream_buffer_t *br; parsec_profiling_buffer_t *b, *next; + dbp_thread_t *thr; int nb, nbthis, pos; dbp->nb_threads = head->nb_threads; @@ -753,7 +965,7 @@ static int read_threads(dbp_file_t *dbp, const parsec_profiling_binary_file_head pos = 0; nb = head->nb_threads; - b = refer_events_buffer(dbp->fd, head->thread_offset); + b = refer_events_buffer(dbp, head->thread_offset); nbthis = b->this_buffer.nb_threads; while( nb > 0 ) { assert(PROFILING_BUFFER_TYPE_THREAD == b->buffer_type); @@ -766,16 +978,20 @@ static int read_threads(dbp_file_t *dbp, const parsec_profiling_binary_file_head res->hr_id = (char*)malloc(128); strncpy(res->hr_id, br->hr_id, 128); res->first_events_buffer_offset = br->first_events_buffer_offset; - res->current_events_buffer = refer_events_buffer(dbp->fd, br->first_events_buffer_offset); + res->current_events_buffer = refer_events_buffer(dbp, br->first_events_buffer_offset); PARSEC_OBJ_CONSTRUCT( res, parsec_list_item_t ); - dbp->threads[head->nb_threads - nb].file = dbp; - dbp->threads[head->nb_threads - nb].profile = res; + thr = &dbp->threads[head->nb_threads - nb]; + thr->file = dbp; + thr->profile = res; + pthread_mutex_init(&thr->cache.mtx, NULL); + thr->cache.keys = (event_cache_key_t*)calloc( + dbp_file_nb_dictionary_entries(dbp), sizeof(event_cache_key_t)); + thr->cache.done = 0; pos += sizeof(parsec_profiling_stream_buffer_t) - sizeof(parsec_profiling_info_buffer_t); - pos += read_thread_infos( res, &dbp->threads[head->nb_threads-nb], - br->nb_infos, (char*)br->infos ); + pos += read_thread_infos( res, thr, br->nb_infos, (char*)br->infos ); nbthis--; nb--; @@ -783,7 +999,7 @@ static int read_threads(dbp_file_t *dbp, const parsec_profiling_binary_file_head if( nbthis == 0 && nb > 0 ) { assert( b->next_buffer_file_offset != -1 ); - next = refer_events_buffer(dbp->fd, b->next_buffer_file_offset); + next = refer_events_buffer(dbp, b->next_buffer_file_offset); if( NULL == next ) { fprintf(stderr, "Unable to read thread entry %d/%d at offset %lx: Profile file broken\n", head->nb_threads-nb, head->nb_threads, (unsigned long)b->next_buffer_file_offset); @@ -830,6 +1046,7 @@ static dbp_multifile_reader_t *open_files(int nbfiles, char **filenames) dbp->files[n].error = -UNABLE_TO_OPEN; continue; } + dbp->files[n].filename = strdup(filenames[i]); dbp->files[n].parent = dbp; dbp->files[n].fd = fd; @@ -879,20 +1096,12 @@ static dbp_multifile_reader_t *open_files(int nbfiles, char **filenames) } } - if( head.profile_buffer_size != event_buffer_size ) { - fprintf(stderr, "The profile in file %s has a buffer size of %d, which is not compatible with the buffer size %d of file %s. File ignored.\n", - dbp->files[n].filename, head.profile_buffer_size, - event_buffer_size, dbp->files[0].filename); - dbp->files[n].error = -DIFF_BUFFER_SIZE; - goto close_and_continue; - } - dbp->files[n].hr_id = strdup(head.hr_id); dbp->files[n].rank = head.rank; read_infos(&dbp->files[n], &head /*dbp->header*/); - if( read_dictionary(&dbp->files[n], fd, &head) != 0 ) { + if( read_dictionary(&dbp->files[n], &head) != 0 ) { fprintf(stderr, "The profile in file %s has a broken dictionary. Trying to use the dictionary of next file. Ignoring the file.\n", dbp->files[n].filename); dbp->files[n].error = -DICT_BROKEN; diff --git a/tools/profiling/dbpreader.h b/tools/profiling/dbpreader.h index 46efdf724..f64dba877 100644 --- a/tools/profiling/dbpreader.h +++ b/tools/profiling/dbpreader.h @@ -71,8 +71,8 @@ const dbp_event_t *dbp_iterator_current(dbp_event_iterator_t *it); const dbp_event_t *dbp_iterator_first(dbp_event_iterator_t *it); const dbp_event_t *dbp_iterator_next(dbp_event_iterator_t *it); void dbp_iterator_delete(dbp_event_iterator_t *it); -int dbp_iterator_move_to_matching_event(dbp_event_iterator_t *pos, const dbp_event_t *ref, int start); -dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(const dbp_event_iterator_t *pos, int start); +int dbp_iterator_move_to_matching_event(dbp_event_iterator_t *pos, const dbp_event_t *ref); +dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(const dbp_event_iterator_t *pos); const dbp_thread_t *dbp_iterator_thread(const dbp_event_iterator_t *it); int dbp_event_get_key(const dbp_event_t *e); diff --git a/tools/profiling/python/pbt2ptt.pxd b/tools/profiling/python/pbt2ptt.pxd index 64bedc8fa..df4f7c245 100644 --- a/tools/profiling/python/pbt2ptt.pxd +++ b/tools/profiling/python/pbt2ptt.pxd @@ -78,7 +78,7 @@ cdef extern from "dbpreader.h": dbp_event_t *dbp_iterator_next(dbp_event_iterator_t *it) void dbp_iterator_delete(dbp_event_iterator_t *it) int dbp_iterator_move_to_matching_event(dbp_event_iterator_t *pos, dbp_event_t *ref) - dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(dbp_event_iterator_t *pos, int start) + dbp_event_iterator_t *dbp_iterator_find_matching_event_all_threads(dbp_event_iterator_t *pos) dbp_thread_t *dbp_iterator_thread(dbp_event_iterator_t *it) int dbp_event_get_key(dbp_event_t *e) diff --git a/tools/profiling/python/pbt2ptt.pyx b/tools/profiling/python/pbt2ptt.pyx index b7f6b5e51..99ff662c5 100644 --- a/tools/profiling/python/pbt2ptt.pyx +++ b/tools/profiling/python/pbt2ptt.pyx @@ -613,7 +613,7 @@ cdef construct_stream(builder, skeleton_only, dbp_multifile_reader_t * dbp, dbp_ traceback.print_exc() print('Failed to extract info from the start event (taskpool_id {0} event_id {1})'.format(taskpool_id, event_id)) - it_e = dbp_iterator_find_matching_event_all_threads(it_s, 0) + it_e = dbp_iterator_find_matching_event_all_threads(it_s) if it_e != NULL: event_e = dbp_iterator_current(it_e) From 32d7bac95d83da8060b742e41e4bb28ebd4c33c0 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 8 Jun 2022 17:53:52 -0400 Subject: [PATCH 094/215] Fix the event reader. Signed-off-by: George Bosilca --- tools/profiling/dbpreader.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/profiling/dbpreader.c b/tools/profiling/dbpreader.c index 5932765e7..dbac668eb 100644 --- a/tools/profiling/dbpreader.c +++ b/tools/profiling/dbpreader.c @@ -408,8 +408,6 @@ static const dbp_event_t *dbp_iterator_next_in_buffer(dbp_event_iterator_t *it) const dbp_event_t *dbp_iterator_next(dbp_event_iterator_t *it) { - size_t elen; - if( NULL == it->current_event.native ) return NULL; assert( it->current_events_buffer->buffer_type == PROFILING_BUFFER_TYPE_EVENTS ); @@ -548,8 +546,9 @@ static int bsearch_compare(const void *key, const void *el) return 0; } -static const event_cache_item_t *dbp_event_find_in_cache(dbp_thread_t *thr, - const dbp_event_t *ref) +static const event_cache_item_t* +dbp_event_find_in_cache(const dbp_thread_t *thr, + const dbp_event_t *ref) { event_cache_key_t *cache_key; bsearch_key_t bsearch_key = { ref, NULL }; @@ -577,12 +576,13 @@ int dbp_iterator_move_to_matching_event(dbp_event_iterator_t *pos, const event_cache_item_t *cache_item; const event_cache_key_t *cache_key; const dbp_event_t *e; + const dbp_thread_t *thr = pos->thread; - cache_item = dbp_event_find_in_cache( pos->thread, ref ); + cache_item = dbp_event_find_in_cache( thr, ref ); cache_key = &thr->cache.keys[BASE_KEY(dbp_event_get_key(ref))]; - assert(&cache_key->items[0] <= cache_item) - assert(&cache_key->items[cache_key->len] > cache_item) + assert(&cache_key->items[0] <= cache_item); + assert(&cache_key->items[cache_key->len] > cache_item); /* iterate over all cached buffers containing possible matches */ while( cache_item < &cache_key->items[cache_key->len] ) { From e47b86b74bb23b8b5046ff5c42ad7ded30c9c477 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Sat, 7 May 2022 10:44:27 -0400 Subject: [PATCH 095/215] Hash table: count used buckets only when needed The used-bucket count is used to remove old tables from the search path once they are empty. However, keeping the count for the current head requires an atomic operation for each insert and removal for a value that is otherwise unused while the table is the current head. Instead, count the number of used buckets during resize so that we can remove the table once it is empty. Signed-off-by: Joseph Schuchart --- parsec/class/parsec_hash_table.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/parsec/class/parsec_hash_table.c b/parsec/class/parsec_hash_table.c index 252943f36..9e7e100aa 100644 --- a/parsec/class/parsec_hash_table.c +++ b/parsec/class/parsec_hash_table.c @@ -252,15 +252,25 @@ static void parsec_hash_table_resize(parsec_hash_table_t *ht) { parsec_atomic_lock_t unlocked = PARSEC_ATOMIC_UNLOCKED; parsec_hash_table_head_t *head; - int nb_bits = ht->rw_hash->nb_bits + 1; + parsec_hash_table_head_t *old_head = ht->rw_hash; + int nb_bits = old_head->nb_bits + 1; assert(nb_bits < 32); + /* count the number of used buckets */ + int32_t used_buckets = 0; + for (size_t i = 0; i < (1ULL << old_head->nb_bits); ++i) { + if (NULL != old_head->buckets[i].first_item) { + ++used_buckets; + } + } + old_head->used_buckets = used_buckets; + head = malloc(sizeof(parsec_hash_table_head_t)); head->buckets = malloc((1ULL<nb_bits = nb_bits; head->used_buckets = 0; - head->next = ht->rw_hash; - head->next_to_free = ht->rw_hash; + head->next = old_head; + head->next_to_free = old_head; ht->rw_hash = head; for( size_t i = 0; i < (1ULL<key; - int res; hash = parsec_hash_table_universal_rehash(ht->key_functions.key_hash(key, ht->hash_data), ht->rw_hash->nb_bits); item->next_item = ht->rw_hash->buckets[hash].first_item; item->hash64 = ht->key_functions.key_hash(key, ht->hash_data); ht->rw_hash->buckets[hash].first_item = item; - res = ht->rw_hash->buckets[hash].cur_len++; - if( 0 == res ) { - parsec_atomic_fetch_inc_int32(&ht->rw_hash->used_buckets); - } + ht->rw_hash->buckets[hash].cur_len++; #if defined(PARSEC_DEBUG_NOISIER) { char estr[64]; @@ -503,7 +509,6 @@ void *parsec_hash_table_nolock_remove(parsec_hash_table_t *ht, parsec_key_t key) parsec_hash_table_item_t *current_item, *prev_item; uint64_t hash64 = ht->key_functions.key_hash(key, ht->hash_data); uint64_t hash = parsec_hash_table_universal_rehash(hash64, ht->rw_hash->nb_bits); - int32_t res; prev_item = NULL; for(current_item = ht->rw_hash->buckets[hash].first_item; NULL != current_item; @@ -514,10 +519,7 @@ void *parsec_hash_table_nolock_remove(parsec_hash_table_t *ht, parsec_key_t key) } else { prev_item->next_item = current_item->next_item; } - res = --(ht->rw_hash->buckets[hash].cur_len); - if( 0 == res ) { - res = parsec_atomic_fetch_dec_int32(&ht->rw_hash->used_buckets); - } + --(ht->rw_hash->buckets[hash].cur_len); #if defined(PARSEC_DEBUG_NOISIER) char estr[64]; PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Removed item %p/%s from hash table %p in bucket %d", From 3ec50bac402acea5a652b29a90810d1e7ab37908 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Fri, 10 Jun 2022 16:15:28 -0400 Subject: [PATCH 096/215] Print the debug rank from device_show_statistics if the rank cannot be found from the context (most common case when this is printed out from parsec-finalize) Signed-off-by: Aurelien Bouteiller --- parsec/mca/device/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 58e6fcfa7..65c33e5ff 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -339,7 +339,7 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex printf("--------------------------------------------------------------------------------------------------\n"); printf("| | | Data In | Data Out |\n"); printf("|Rank %3d | # KERNEL | %% | Required | Transfered(%%) | Required | Transfered(%%) |\n", - (NULL == parsec_context ? -1 : parsec_context->my_rank)); + (NULL == parsec_context ? parsec_debug_rank : parsec_context->my_rank)); printf("|---------|-----------|--------|------------|-------------------|------------|-------------------|\n"); for( i = 0; i < parsec_nb_devices; i++ ) { if( NULL == (device = parsec_devices[i]) ) continue; From f8d4086e4746bc2f3d2f5ea7f9dbd0c45241144f Mon Sep 17 00:00:00 2001 From: Thomas Heraul Date: Wed, 16 Mar 2022 16:24:47 -0400 Subject: [PATCH 097/215] Handle more gracefully cases of error in CUDA module init --- parsec/mca/device/cuda/device_cuda_module.c | 24 ++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index fd7245da6..d2ff0110d 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -368,25 +368,29 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) cuda_device->minor = (uint8_t)minor; len = asprintf(&gpu_device->super.name, "%s (%d)", szName, dev_id); if(-1 == len) - gpu_device->super.name = ""; + gpu_device->super.name = strdup(""); gpu_device->data_avail_epoch = 0; - gpu_device->max_exec_streams = PARSEC_MAX_STREAMS; + gpu_device->max_exec_streams = 0; // We will increment this as streams are succesfully initialized gpu_device->exec_stream = - (parsec_gpu_exec_stream_t**)malloc(gpu_device->max_exec_streams * sizeof(parsec_gpu_exec_stream_t*)); + (parsec_gpu_exec_stream_t**)malloc(PARSEC_MAX_STREAMS * sizeof(parsec_gpu_exec_stream_t*)); // To reduce the number of separate malloc, we allocate all the streams in a single block, stored in exec_stream[0] // Because the gpu_device structure does not know the size of cuda_stream or other GPU streams, it needs to keep // separate pointers for the beginning of each exec_stream - gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)malloc(gpu_device->max_exec_streams * sizeof - (parsec_cuda_exec_stream_t)); - for( j = 1; j < gpu_device->max_exec_streams; j++ ) { + // We use calloc because we need some fields to be zero-initialized to ensure graceful handling of errors + gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)calloc(PARSEC_MAX_STREAMS, + sizeof(parsec_cuda_exec_stream_t)); + for( j = 1; j < PARSEC_MAX_STREAMS; j++ ) { gpu_device->exec_stream[j] = (parsec_gpu_exec_stream_t*)( (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[0] + j); } - for( j = 0; j < gpu_device->max_exec_streams; j++ ) { + for( j = 0; j < PARSEC_MAX_STREAMS; j++ ) { parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; parsec_gpu_exec_stream_t* exec_stream = &cuda_stream->super; + /* We will have to release up to this stream in case of error */ + gpu_device->max_exec_streams++; + /* Allocate the stream */ cudastatus = cudaStreamCreate( &(cuda_stream->cuda_stream) ); PARSEC_CUDA_CHECK_ERROR( "cudaStreamCreate ", cudastatus, @@ -415,15 +419,15 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) if(j == 0) { len = asprintf(&exec_stream->name, "h2d(%d)", j); if(-1 == len) - exec_stream->name = "h2d"; + exec_stream->name = strdup("h2d"); } else if(j == 1) { len = asprintf(&exec_stream->name, "d2h(%d)", j); if(-1 == len) - exec_stream->name = "d2h"; + exec_stream->name = strdup("d2h"); } else { len = asprintf(&exec_stream->name, "cuda(%d)", j); if(-1 == len) - exec_stream->name = "cuda"; + exec_stream->name = strdup("cuda"); } #if defined(PARSEC_PROF_TRACE) /* Each 'exec' stream gets its own profiling stream, except IN and OUT stream that share it. From e7c6c764ca968352a3664bbb8c2ee2be65de7288 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Thu, 9 Jun 2022 15:57:08 -0400 Subject: [PATCH 098/215] Configurable max_streams, handle more error cases during cuda init Signed-off-by: Aurelien Bouteiller --- .../mca/device/cuda/device_cuda_component.c | 7 +++- parsec/mca/device/cuda/device_cuda_internal.h | 4 +- parsec/mca/device/cuda/device_cuda_module.c | 38 +++++++++---------- parsec/mca/device/device_gpu.c | 12 +++--- parsec/mca/device/device_gpu.h | 3 +- tests/dsl/dtd/dtd_test_cuda_task_insert.c | 4 +- 6 files changed, 33 insertions(+), 35 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 9d71635e0..05c78747b 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2020 The University of Tennessee and The University + * Copyright (c) 2010-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. */ @@ -37,7 +37,7 @@ static int device_cuda_component_register(void); /* mca params */ int parsec_device_cuda_enabled_index, parsec_device_cuda_enabled; -int parsec_cuda_sort_pending = 0; +int parsec_cuda_sort_pending = 0, parsec_cuda_max_streams = PARSEC_GPU_MAX_STREAMS; int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; char* parsec_cuda_lib_path = NULL; int parsec_cuda_migrate_tasks = 0; @@ -192,6 +192,9 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "max_number_of_ejected_data", "Sets up the maximum number of blocks that can be ejected from GPU memory", false, false, MAX_PARAM_COUNT, &parsec_gpu_d2h_max_flows); + (void)parsec_mca_param_reg_int_name("device_cuda", "max_streams", + "Maximum number of Streams to use for the GPU engine; 2 streams are used for communication between host and device, so the minimum is 3", + false, false, PARSEC_GPU_MAX_STREAMS, &parsec_cuda_max_streams); (void)parsec_mca_param_reg_int_name("device_cuda", "sort_pending_tasks", "Boolean to let the GPU engine sort the first pending tasks stored in the list", false, false, 0, &parsec_cuda_sort_pending); diff --git a/parsec/mca/device/cuda/device_cuda_internal.h b/parsec/mca/device/cuda/device_cuda_internal.h index 351ab52ce..a3643c968 100644 --- a/parsec/mca/device/cuda/device_cuda_internal.h +++ b/parsec/mca/device/cuda/device_cuda_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2020 The University of Tennessee and The University + * Copyright (c) 2010-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. */ @@ -15,7 +15,7 @@ BEGIN_C_DECLS /* From MCA parameters */ extern int parsec_device_cuda_enabled_index, parsec_device_cuda_enabled; -extern int parsec_cuda_sort_pending; +extern int parsec_cuda_sort_pending, parsec_cuda_max_streams; extern int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; extern char* parsec_cuda_lib_path; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d2ff0110d..b9098c32a 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -359,6 +359,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) streaming_multiprocessor = prop.multiProcessorCount; computemode = prop.computeMode; + // We use calloc because we need some fields to be zero-initialized to ensure graceful handling of errors cuda_device = (parsec_device_cuda_module_t*)calloc(1, sizeof(parsec_device_cuda_module_t)); gpu_device = &cuda_device->super; device = &gpu_device->super; @@ -366,30 +367,29 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) cuda_device->cuda_index = (uint8_t)dev_id; cuda_device->major = (uint8_t)major; cuda_device->minor = (uint8_t)minor; - len = asprintf(&gpu_device->super.name, "%s (%d)", szName, dev_id); - if(-1 == len) - gpu_device->super.name = strdup(""); + len = asprintf(&gpu_device->super.name, "%s: cuda(%d)", szName, dev_id); + if(-1 == len) { gpu_device->super.name = NULL; goto release_device; } gpu_device->data_avail_epoch = 0; - gpu_device->max_exec_streams = 0; // We will increment this as streams are succesfully initialized + gpu_device->max_exec_streams = parsec_cuda_max_streams; gpu_device->exec_stream = - (parsec_gpu_exec_stream_t**)malloc(PARSEC_MAX_STREAMS * sizeof(parsec_gpu_exec_stream_t*)); + (parsec_gpu_exec_stream_t**)malloc(gpu_device->max_exec_streams * sizeof(parsec_gpu_exec_stream_t*)); // To reduce the number of separate malloc, we allocate all the streams in a single block, stored in exec_stream[0] // Because the gpu_device structure does not know the size of cuda_stream or other GPU streams, it needs to keep // separate pointers for the beginning of each exec_stream // We use calloc because we need some fields to be zero-initialized to ensure graceful handling of errors - gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)calloc(PARSEC_MAX_STREAMS, - sizeof(parsec_cuda_exec_stream_t)); - for( j = 1; j < PARSEC_MAX_STREAMS; j++ ) { + gpu_device->exec_stream[0] = (parsec_gpu_exec_stream_t*)calloc(gpu_device->max_exec_streams, + sizeof(parsec_cuda_exec_stream_t)); + for( j = 1; j < gpu_device->max_exec_streams; j++ ) { gpu_device->exec_stream[j] = (parsec_gpu_exec_stream_t*)( (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[0] + j); } - for( j = 0; j < PARSEC_MAX_STREAMS; j++ ) { + for( j = 0; j < gpu_device->max_exec_streams; j++ ) { parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; parsec_gpu_exec_stream_t* exec_stream = &cuda_stream->super; - /* We will have to release up to this stream in case of error */ - gpu_device->max_exec_streams++; + /* We will have to release up to this stream in case of error */ + gpu_device->num_exec_streams++; /* Allocate the stream */ cudastatus = cudaStreamCreate( &(cuda_stream->cuda_stream) ); @@ -417,18 +417,13 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) {goto release_device;} ); } if(j == 0) { - len = asprintf(&exec_stream->name, "h2d(%d)", j); - if(-1 == len) - exec_stream->name = strdup("h2d"); + len = asprintf(&exec_stream->name, "h2d_cuda(%d)", j); } else if(j == 1) { - len = asprintf(&exec_stream->name, "d2h(%d)", j); - if(-1 == len) - exec_stream->name = strdup("d2h"); + len = asprintf(&exec_stream->name, "d2h_cuda(%d)", j); } else { len = asprintf(&exec_stream->name, "cuda(%d)", j); - if(-1 == len) - exec_stream->name = strdup("cuda"); } + if(-1 == len) { exec_stream->name = NULL; goto release_device; } #if defined(PARSEC_PROF_TRACE) /* Each 'exec' stream gets its own profiling stream, except IN and OUT stream that share it. * It's good to separate the exec streams to know what was submitted to what stream @@ -518,6 +513,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) if( NULL != gpu_device->exec_stream) { for( j = 0; j < gpu_device->max_exec_streams; j++ ) { parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; + if(NULL == cuda_stream) continue; parsec_gpu_exec_stream_t* exec_stream = &cuda_stream->super; if( NULL != exec_stream->fifo_pending ) { @@ -574,7 +570,7 @@ parsec_cuda_module_fini(parsec_device_module_t* device) PARSEC_OBJ_DESTRUCT(&gpu_device->pending); /* Release all streams */ - for( j = 0; j < gpu_device->max_exec_streams; j++ ) { + for( j = 0; j < gpu_device->num_exec_streams; j++ ) { parsec_cuda_exec_stream_t* cuda_stream = (parsec_cuda_exec_stream_t*)gpu_device->exec_stream[j]; parsec_gpu_exec_stream_t* exec_stream = &cuda_stream->super; @@ -2825,7 +2821,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task = progress_task; /* Stage-in completed for this task: it is ready to be executed */ - exec_stream = (exec_stream + 1) % (gpu_device->max_exec_streams - 2); /* Choose an exec_stream */ + exec_stream = (exec_stream + 1) % (gpu_device->num_exec_streams - 2); /* Choose an exec_stream */ if( NULL != gpu_task ) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tExecute %s priority %d", gpu_device->super.name, parsec_task_snprintf(tmp, MAX_TASK_STRLEN, gpu_task->ec), diff --git a/parsec/mca/device/device_gpu.c b/parsec/mca/device/device_gpu.c index 96e4ad0ab..ec2059c47 100644 --- a/parsec/mca/device/device_gpu.c +++ b/parsec/mca/device/device_gpu.c @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2021 The University of Tennessee and The University + * Copyright (c) 2021-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. */ @@ -9,8 +9,6 @@ #include "parsec/mca/device/device.h" #include "parsec/mca/device/device_gpu.h" #include "parsec/utils/zone_malloc.h" -#include "parsec/utils/mca_param.h" -#include "parsec/mca/mca_repository.h" #include "parsec/constants.h" #include "parsec/utils/debug.h" #include "parsec/execution_stream.h" @@ -219,7 +217,7 @@ int parsec_gpu_free_workspace(parsec_device_gpu_module_t * gpu_device) (void)gpu_device; #if !defined(PARSEC_GPU_ALLOC_PER_TILE) int i, j; - for( i = 0; i < gpu_device->max_exec_streams; i++ ) { + for( i = 0; i < gpu_device->num_exec_streams; i++ ) { parsec_gpu_exec_stream_t *gpu_stream = gpu_device->exec_stream[i]; if (gpu_stream->workspace != NULL) { for (j = 0; j < gpu_stream->workspace->total_workspace; j++) { @@ -310,13 +308,13 @@ void dump_GPU_state(parsec_device_gpu_module_t* gpu_device) parsec_output(parsec_gpu_output_stream, "\n\n"); parsec_output(parsec_gpu_output_stream, "Device %d:%d (%p) epoch\n", gpu_device->super.device_index, gpu_device->super.device_index, gpu_device, gpu_device->data_avail_epoch); - parsec_output(parsec_gpu_output_stream, "\tpeer mask %x executed tasks %llu max streams %d\n", - gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->max_exec_streams); + parsec_output(parsec_gpu_output_stream, "\tpeer mask %x executed tasks with %llu streams %d\n", + gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->num_exec_streams); parsec_output(parsec_gpu_output_stream, "\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu]\n", (unsigned long long)gpu_device->super.transferred_data_in, (unsigned long long)gpu_device->super.d2d_transfer, (unsigned long long)gpu_device->super.transferred_data_out, (unsigned long long)gpu_device->super.required_data_in, (unsigned long long)gpu_device->super.required_data_out); - for( i = 0; i < gpu_device->max_exec_streams; i++ ) { + for( i = 0; i < gpu_device->num_exec_streams; i++ ) { dump_exec_stream(gpu_device->exec_stream[i]); } if( !parsec_list_is_empty(&gpu_device->gpu_mem_lru) ) { diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 7876912e9..6c822dac0 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -18,7 +18,7 @@ BEGIN_C_DECLS #define PARSEC_GPU_USE_PRIORITIES 1 -#define PARSEC_MAX_STREAMS 6 +#define PARSEC_GPU_MAX_STREAMS 6 #define PARSEC_MAX_EVENTS_PER_STREAM 4 #define PARSEC_GPU_MAX_WORKSPACE 2 @@ -118,6 +118,7 @@ struct parsec_gpu_task_s { struct parsec_device_gpu_module_s { parsec_device_module_t super; uint8_t max_exec_streams; + uint8_t num_exec_streams; int16_t peer_access_mask; /**< A bit set to 1 represent the capability of * the device to access directly the memory of * the index of the set bit device. diff --git a/tests/dsl/dtd/dtd_test_cuda_task_insert.c b/tests/dsl/dtd/dtd_test_cuda_task_insert.c index 6c5977f19..49f70de9b 100644 --- a/tests/dsl/dtd/dtd_test_cuda_task_insert.c +++ b/tests/dsl/dtd/dtd_test_cuda_task_insert.c @@ -56,8 +56,8 @@ int print_cuda_info_task(parsec_device_cuda_module_t *cuda_device, printf(" CUDA device compute capability: %d.%d\n", cuda_device->major, cuda_device->minor); printf( " CUDA Stream: %p\n", (void*)(uintptr_t)cuda_stream->cuda_stream); - printf(" CUDA device max exec stream: %d\n", - cuda_device->super.max_exec_streams); + printf(" CUDA device num exec stream: %d\n", + cuda_device->super.num_exec_streams); return PARSEC_HOOK_RETURN_DONE; } From 8c0da3304b0d1bb8a9c6e5caa1195dc33c948efd Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 17 Jun 2022 17:24:49 -0400 Subject: [PATCH 099/215] Update to a newer spack compiler Correctly set the environment variables before cmake invocation Signed-off-by: George Bosilca --- .github/CI/github_runner.yaml | 4 ++-- .github/workflows/build_cmake.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/CI/github_runner.yaml b/.github/CI/github_runner.yaml index afee052f3..f83eab9b2 100644 --- a/.github/CI/github_runner.yaml +++ b/.github/CI/github_runner.yaml @@ -1,7 +1,7 @@ spack: definitions: - pkgs: - - gcc@11.2.0 + - gcc@12.1.0 - git - patch - flex @@ -11,7 +11,7 @@ spack: - py-cython - cmake - ninja - - otf2 + - otf2@2.3 - openmpi view: true diff --git a/.github/workflows/build_cmake.yml b/.github/workflows/build_cmake.yml index efbd73eb6..4b31931fe 100644 --- a/.github/workflows/build_cmake.yml +++ b/.github/workflows/build_cmake.yml @@ -60,7 +60,8 @@ jobs: # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: | source ${{github.workspace}}/.github/CI/spack_setup.sh - cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG CC=gcc CXX=g++ + echo CC=gcc CXX=g++ FC=gfortran cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG + CC=gcc CXX=g++ FC=gfortran cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE $BUILD_CONFIG - name: Build working-directory: ${{ env.BUILD_DIRECTORY }} From 2f54e3e241de81c23dcc90a050eb4b023dc98a10 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Fri, 10 Jun 2022 18:38:17 -0400 Subject: [PATCH 100/215] Make the PUSHOUT and other DTD GPU concepts generic so that we can compile w/o cuda (and with rocm, when rocm PR merge) Signed-off-by: Aurelien Bouteiller --- parsec/interfaces/dtd/insert_function.c | 52 ++++++++----------- parsec/interfaces/dtd/insert_function.h | 15 ------ .../interfaces/dtd/insert_function_internal.h | 5 +- tests/dsl/dtd/dtd_bench_simple_gemm.c | 48 +++++++++-------- 4 files changed, 51 insertions(+), 69 deletions(-) diff --git a/parsec/interfaces/dtd/insert_function.c b/parsec/interfaces/dtd/insert_function.c index 8660fd87b..999f6a074 100644 --- a/parsec/interfaces/dtd/insert_function.c +++ b/parsec/interfaces/dtd/insert_function.c @@ -1140,10 +1140,8 @@ parsec_dtd_tile_of(parsec_data_collection_t *dc, parsec_data_key_t key) tile->flushed = NOT_FLUSHED; if( tile->rank == (int)dc->myrank ) { tile->data_copy = (dc->data_of_key(dc, tile->key))->device_copies[0]; -#if defined(PARSEC_HAVE_CUDA) assert(NULL != tile->data_copy); tile->data_copy->readers = 0; -#endif } else { tile->data_copy = NULL; } @@ -1354,11 +1352,9 @@ parsec_dtd_startup(parsec_context_t *context, parsec_device_module_t *device = parsec_mca_device_get(_i); if( NULL == device ) continue; if( !(tp->devices_index_mask & (1 << device->device_index))) continue; /* not supported */ -#if defined(PARSEC_HAVE_CUDA) // If CUDA is enabled, let the CUDA device activated for this // taskpool. if( PARSEC_DEV_CUDA == device->type ) continue; -#endif /* defined(PARSEC_HAVE_CUDA) */ if( NULL != device->taskpool_register ) if( PARSEC_SUCCESS != device->taskpool_register(device, (parsec_taskpool_t *)tp)) { @@ -1721,7 +1717,6 @@ complete_hook_of_dtd(parsec_execution_stream_t *es, for( current_dep = 0; current_dep < this_dtd_task->super.task_class->nb_flows; current_dep++ ) { action_mask |= (1 << current_dep); -#if defined(PARSEC_HAVE_CUDA) // Retrieve data access mode for current flow int op_type_on_current_flow = FLOW_OF(this_dtd_task, current_dep)->op_type; @@ -1746,7 +1741,6 @@ complete_hook_of_dtd(parsec_execution_stream_t *es, /* printf("[complete_hook_of_dtd] %s, data_in readers = %d, data_out readers = %d\n", this_task->task_class->name, data_in->original->device_copies[0]->readers, data_out->original->device_copies[0]->readers); */ } -#endif } this_task->task_class->release_deps(es, this_task, action_mask | @@ -2177,8 +2171,7 @@ parsec_dtd_template_release( const parsec_task_class_t *tc ) } } -#if defined(PARSEC_HAVE_CUDA) -static parsec_hook_return_t parsec_dtd_cuda_task_submit(parsec_execution_stream_t *es, parsec_task_t *this_task) +static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t *es, parsec_task_t *this_task) { (void) es; int dev_index; @@ -2197,12 +2190,10 @@ static parsec_hook_return_t parsec_dtd_cuda_task_submit(parsec_execution_stream_ PARSEC_OBJ_CONSTRUCT(gpu_task, parsec_list_item_t); gpu_task->ec = (parsec_task_t *) this_task; - gpu_task->submit = dtd_tc->cuda_func_ptr; + gpu_task->submit = dtd_tc->gpu_func_ptr; gpu_task->task_type = 0; gpu_task->load = ratio * parsec_device_sweight[dev_index]; gpu_task->last_data_check_epoch = -1; /* force at least one validation for the task */ - gpu_task->stage_in = parsec_default_cuda_stage_in; - gpu_task->stage_out = parsec_default_cuda_stage_out; gpu_task->pushout = 0; for(int i = 0; i < dtd_tc->super.nb_flows; i++) { parsec_dtd_flow_info_t *flow = FLOW_OF(dtd_task, i); @@ -2213,9 +2204,20 @@ static parsec_hook_return_t parsec_dtd_cuda_task_submit(parsec_execution_stream_ } parsec_device_load[dev_index] += (float)gpu_task->load; - return parsec_cuda_kernel_scheduler(es, gpu_task, dev_index); -} + parsec_device_module_t *device = parsec_mca_device_get(dev_index); + assert(NULL != device); + switch(device->type) { +#if defined(PARSEC_HAVE_CUDA) + case PARSEC_DEV_CUDA: + gpu_task->stage_in = parsec_default_cuda_stage_in; + gpu_task->stage_out = parsec_default_cuda_stage_out; + return parsec_cuda_kernel_scheduler(es, gpu_task, dev_index); #endif + default: + parsec_fatal("DTD scheduling on device type %d: this is not a valid GPU device type in this build", device->type); + } + return PARSEC_HOOK_RETURN_ERROR; +} int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp, parsec_task_class_t *tc, @@ -2251,13 +2253,11 @@ int parsec_dtd_task_class_add_chore(parsec_taskpool_t *tp, } (*pincarnations)[i].type = device_type; -#if defined(PARSEC_HAVE_CUDA) if(PARSEC_DEV_CUDA == device_type) { - (*pincarnations)[i].hook = parsec_dtd_cuda_task_submit; - dtd_tc->cuda_func_ptr = (parsec_advance_task_function_t)function; - } else -#endif - { + (*pincarnations)[i].hook = parsec_dtd_gpu_task_submit; + dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)function; + } + else { dtd_tc->cpu_func_ptr = function; (*pincarnations)[i].hook = function; // We can directly call the CPU hook, as there is nothing else to do } @@ -3167,14 +3167,12 @@ __parsec_dtd_taskpool_create_task(parsec_taskpool_t *tp, incarnations = (__parsec_chore_t **)&tc->incarnations; (*incarnations)[0].type = device_type; -#if defined(PARSEC_HAVE_CUDA) if( device_type == PARSEC_DEV_CUDA ) { /* Special case for CUDA: we need an intermediate */ - (*incarnations)[0].hook = parsec_dtd_cuda_task_submit; - dtd_tc->cuda_func_ptr = (parsec_advance_task_function_t)fpointer; - } else -#endif - { + (*incarnations)[0].hook = parsec_dtd_gpu_task_submit; + dtd_tc->gpu_func_ptr = (parsec_advance_task_function_t)fpointer; + } + else { /* Default case: the user-provided function is directly the hook to call */ (*incarnations)[0].hook = fpointer; // We can directly call the CPU hook dtd_tc->cpu_func_ptr = fpointer; @@ -3413,8 +3411,6 @@ int parsec_dtd_destroy_arena_datatype(parsec_context_t *ctx, int id) return PARSEC_SUCCESS; } -#if defined(PARSEC_HAVE_CUDA) - /** * Return pointer on the device pointer associated with the i-th flow * of `this_task`. @@ -3433,5 +3429,3 @@ parsec_dtd_get_dev_ptr(parsec_task_t *this_task, int i) return dev_ptr; } - -#endif diff --git a/parsec/interfaces/dtd/insert_function.h b/parsec/interfaces/dtd/insert_function.h index 95a929d52..17e49ca3b 100644 --- a/parsec/interfaces/dtd/insert_function.h +++ b/parsec/interfaces/dtd/insert_function.h @@ -14,9 +14,6 @@ #include "parsec/runtime.h" #include "parsec/data_distribution.h" -#if defined(PARSEC_HAVE_CUDA) -#include "parsec/mca/device/cuda/device_cuda.h" -#endif /* defined(PARSEC_HAVE_CUDA) */ BEGIN_C_DECLS @@ -70,10 +67,8 @@ typedef enum { PARSEC_INPUT = 0x100000, PARSEC_GET_OP_TYPE =0xf00000, /* MASK: not an actual value, used to filter the relevant enum values */ PARSEC_AFFINITY =1<<16, /* Data affinity */ PARSEC_DONT_TRACK =1<<17, /* Drop dependency tracking */ -#if defined(PARSEC_HAVE_CUDA) PARSEC_PUSHOUT =1<<18, /* Push GPU data back to CPU */ PARSEC_PULLIN =1<<19, /* Pull data on the CPU */ -#endif PARSEC_GET_OTHER_FLAG_INFO=0xf0000, /* MASK: not an actual value, used to filter the relevant enum values */ PARSEC_GET_REGION_INFO=0xffff /* MASK: not an actual value, used to filter the relevant enum values */ } parsec_dtd_op_t; @@ -177,14 +172,6 @@ typedef struct parsec_dtd_taskpool_s parsec_dtd_taskpool_t; */ typedef parsec_hook_return_t (parsec_dtd_funcptr_t)(parsec_execution_stream_t *, parsec_task_t *); -#if defined(PARSEC_HAVE_CUDA) -// FIXME: The following macro should be removed and is only useful -// during the development of the cuda DTD feature. -#define PARSEC_DTD_HAVE_CUDA -#include -#include -#endif /* defined(PARSEC_HAVE_CUDA) */ - /** * This function is used to retrieve the parameters passed during insertion of a task. * This function takes variadic parameters. @@ -294,14 +281,12 @@ parsec_dtd_insert_task(parsec_taskpool_t *tp, int device_type, const char *name_of_kernel, ...); -#if defined(PARSEC_HAVE_CUDA) /** * Return pointer on the device pointer associated with the i-th flow * of `this_task`. **/ void* parsec_dtd_get_dev_ptr(parsec_task_t *this_task, int i); -#endif /** * This function behaves exactly like parsec_dtd_insert_task() diff --git a/parsec/interfaces/dtd/insert_function_internal.h b/parsec/interfaces/dtd/insert_function_internal.h index 6aacf3820..5fce90ef5 100644 --- a/parsec/interfaces/dtd/insert_function_internal.h +++ b/parsec/interfaces/dtd/insert_function_internal.h @@ -19,6 +19,7 @@ #include "parsec/data_distribution.h" #include "parsec/interfaces/dtd/insert_function.h" #include "parsec/execution_stream.h" +#include "parsec/mca/device/device_gpu.h" BEGIN_C_DECLS @@ -252,9 +253,7 @@ struct parsec_dtd_task_class_s { int ref_count; parsec_dtd_param_t *params; parsec_hook_t *cpu_func_ptr; -#if defined(PARSEC_HAVE_CUDA) - parsec_advance_task_function_t cuda_func_ptr; -#endif + parsec_advance_task_function_t gpu_func_ptr; }; typedef int (parsec_dtd_arg_cb)(int first_arg, void *second_arg, int third_arg, void *cb_data); diff --git a/tests/dsl/dtd/dtd_bench_simple_gemm.c b/tests/dsl/dtd/dtd_bench_simple_gemm.c index 938287d11..550109728 100644 --- a/tests/dsl/dtd/dtd_bench_simple_gemm.c +++ b/tests/dsl/dtd/dtd_bench_simple_gemm.c @@ -3,6 +3,10 @@ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" +#if defined(PARSEC_HAVE_CUDA) +#include "parsec/mca/device/cuda/device_cuda.h" +#endif + #include "cublas_v2.h" #if HAVE_BLAS #if BLAS_WITH_ESSL @@ -80,7 +84,7 @@ int initialize_tile(parsec_execution_stream_t *es, parsec_task_t *this_task) } int initialize_matrix(parsec_context_t *parsec_context, int rank, parsec_matrix_block_cyclic_t *mat, unsigned int seed, - const char *name, int *cuda_device_index, int nb_gpus) + const char *name, int *gpu_device_index, int nb_gpus) { parsec_taskpool_t *tp = parsec_dtd_taskpool_new(); @@ -125,11 +129,11 @@ int initialize_matrix(parsec_context_t *parsec_context, int rank, parsec_matrix_ if(PARSEC_DEV_CUDA == device && (int)mat->super.super.rank_of_key(&mat->super.super, key) == rank ) { if( verbose ) { - fprintf(stderr, "Advice %s(%d, %d) to prefer CUDA device %d (parsec device %d) of rank %d\n", - name, i, j, g, cuda_device_index[g], (int)mat->super.super.rank_of_key(&mat->super.super, key)); + fprintf(stderr, "Advice %s(%d, %d) to prefer GPU device %d (parsec device %d) of rank %d\n", + name, i, j, g, gpu_device_index[g], (int)mat->super.super.rank_of_key(&mat->super.super, key)); } parsec_advise_data_on_device(mat->super.super.data_of_key(&mat->super.super, key), - cuda_device_index[g], + gpu_device_index[g], PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE); } g = (g + 1) % nb_gpus; @@ -151,7 +155,7 @@ int initialize_matrix(parsec_context_t *parsec_context, int rank, parsec_matrix_ return 0; } -int gemm_kernel_cuda(parsec_device_cuda_module_t *cuda_device, +int gemm_kernel_cuda(parsec_device_gpu_module_t *gpu_device, parsec_gpu_task_t *gpu_task, parsec_gpu_exec_stream_t *gpu_stream) { @@ -166,7 +170,7 @@ int gemm_kernel_cuda(parsec_device_cuda_module_t *cuda_device, double *a_gpu, *b_gpu, *c_gpu; (void)gpu_stream; - (void)cuda_device; + (void)gpu_device; parsec_dtd_unpack_args(this_task, &A, &B, &C, @@ -179,7 +183,7 @@ int gemm_kernel_cuda(parsec_device_cuda_module_t *cuda_device, handle = parsec_info_get(&gpu_stream->infos, CuHI); assert(NULL != handle); - one_device = parsec_info_get(&cuda_device->super.super.infos, Cu1); + one_device = parsec_info_get(&gpu_device->super.infos, Cu1); assert(NULL != one_device); gettimeofday(&start, NULL); @@ -310,7 +314,7 @@ int simple_gemm(parsec_context_t *parsec_context, parsec_matrix_block_cyclic_t * return 0; } -int get_nb_cuda_devices() +int get_nb_gpu_devices() { int nb = 0; @@ -324,7 +328,7 @@ int get_nb_cuda_devices() return nb; } -int *get_cuda_device_index() +int *get_gpu_device_index() { int *dev_index = NULL; @@ -403,7 +407,7 @@ static void *allocate_one_on_device(void *obj, void *p) #endif } -static parsec_matrix_block_cyclic_t *create_initialize_matrix(parsec_context_t *parsec_context, int rank, unsigned int seed, const char *name, int mb, int nb, int M, int N, int *cuda_device_index, int nbgpus) +static parsec_matrix_block_cyclic_t *create_initialize_matrix(parsec_context_t *parsec_context, int rank, unsigned int seed, const char *name, int mb, int nb, int M, int N, int *gpu_device_index, int nbgpus) { parsec_matrix_block_cyclic_t *dc; dc = calloc(1, sizeof(parsec_matrix_block_cyclic_t)); @@ -421,7 +425,7 @@ static parsec_matrix_block_cyclic_t *create_initialize_matrix(parsec_context_t * (size_t)dc->super.bsiz * (size_t)parsec_datadist_getsizeoftype(dc->super.mtype)); parsec_dtd_data_collection_init(A); - initialize_matrix(parsec_context, rank, dc, seed, name, cuda_device_index, nbgpus); + initialize_matrix(parsec_context, rank, dc, seed, name, gpu_device_index, nbgpus); return dc; } @@ -595,16 +599,16 @@ int main(int argc, char **argv) int ncores = -1; // Use all available cores parsec_context = parsec_init(ncores, &pargc, &pargv); - int *cuda_device_index = NULL; + int *gpu_device_index = NULL; if( PARSEC_DEV_CUDA == device ) { - nbgpus = get_nb_cuda_devices(); + nbgpus = get_nb_gpu_devices(); rc = !(nbgpus >= 1); if( rc != 0 ) { fprintf(stderr, "Rank %d doesn't have CUDA accelerators\n", rank); MPI_Abort(MPI_COMM_WORLD, 0); return -1; } - cuda_device_index = get_cuda_device_index(); + gpu_device_index = get_gpu_device_index(); // Prepare CUBLAS Handle marshaller CuHI = parsec_info_register(&parsec_per_stream_infos, "CUBLAS::HANDLE", @@ -625,21 +629,21 @@ int main(int argc, char **argv) // Create and initialize the data parsec_matrix_block_cyclic_t *dcA = create_initialize_matrix(parsec_context, rank, 1789, "A", mb, kb, M, K, - cuda_device_index, nbgpus); + gpu_device_index, nbgpus); parsec_matrix_block_cyclic_t *dcB = create_initialize_matrix(parsec_context, rank, 1805, "B", kb, nb, K, N, - cuda_device_index, nbgpus); + gpu_device_index, nbgpus); parsec_matrix_block_cyclic_t *dcC = create_initialize_matrix(parsec_context, rank, 1901, "C", mb, nb, M, N, - cuda_device_index, nbgpus); + gpu_device_index, nbgpus); for( int r = 0; r < runs + 1; r++ ) { - double gflop = 2.0 * M * N * K / 1e9; - double maxtime = 0.0; + double gflop = 2.0 * M * N * K / 1e9; + double maxtime = 0.0; if(min_perf > 0.0) maxtime = gflop/world/nbgpus/min_perf; struct timeval start, end, diff; - if(maxtime > 0.0 && maxtime < 60.0) maxtime=60.0; - if(rank == 0 && maxtime > 0.0) fprintf(stderr, "watchdog: %d seconds\n", (int)maxtime); - if(maxtime > 0.0) alarm((int)maxtime); + if(maxtime > 0.0 && maxtime < 60.0) maxtime=60.0; + if(rank == 0 && maxtime > 0.0) fprintf(stderr, "watchdog: %d seconds\n", (int)maxtime); + if(maxtime > 0.0) alarm((int)maxtime); gettimeofday(&start, NULL); simple_gemm(parsec_context, dcA, dcB, dcC); gettimeofday(&end, NULL); From a85baa5ff43559a1c8d807b52352cfa34633089a Mon Sep 17 00:00:00 2001 From: Thomas Herault Date: Fri, 13 May 2022 14:35:23 -0400 Subject: [PATCH 101/215] Fix current CUDA bug... Introduction of the NEW optimization in CUDA introduced this bug that makes the CUDA driver not copy data from RAM to GPU, if the data copy comes from a direct memory access. The test to detect this is a NEW tile is wrong in the code, and abusively confuses copies coming directly from memory with copies coming from a NEW operation. The proper way to detect this is a NEW data is to check if the data collection is NULL (meaning it's not a direct access from the data collection) AND if the repo_entry is NULL (meaning it doesn't have a predecessor). Tested with DPLASMA dpotrf on leconte. --- parsec/mca/device/cuda/device_cuda_module.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index b9098c32a..904f13dac 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1500,8 +1500,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, __FILE__, __LINE__); } - /* If data is from NEW, as we skip NULL */ - if( NULL == task_data->source_repo_entry ) + if( NULL == task_data->source_repo_entry && NULL == task_data->data_in->original->dc ) transfer_from = -1; /* Do not need to be tranferred */ From 6288f881601968bb91e53b61378750ce1c75cceb Mon Sep 17 00:00:00 2001 From: Thomas Herault Date: Tue, 21 Jun 2022 16:33:58 -0400 Subject: [PATCH 102/215] Typo in local queues utils that would make Inverse Priority Scheduler behave exactly as Absolute Priority --- parsec/mca/sched/sched_local_queues_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/sched/sched_local_queues_utils.h b/parsec/mca/sched/sched_local_queues_utils.h index 5f9fb8c8e..aba98147d 100644 --- a/parsec/mca/sched/sched_local_queues_utils.h +++ b/parsec/mca/sched/sched_local_queues_utils.h @@ -164,7 +164,7 @@ static inline parsec_task_t *parsec_mca_sched_list_local_counter_pop_front(parse static inline parsec_task_t *parsec_mca_sched_list_local_counter_pop_back(parsec_mca_sched_list_local_counter_t *sl) { parsec_task_t * context = - (parsec_task_t*)parsec_list_pop_front(sl->list); + (parsec_task_t*)parsec_list_pop_back(sl->list); if(NULL != context) sl->local_counter--; return context; From efba073860eb8cde2fc573542e387ce9e1e3c3c1 Mon Sep 17 00:00:00 2001 From: Thomas Herault Date: Thu, 24 Feb 2022 14:36:04 -0500 Subject: [PATCH 103/215] dtd_test_simple_gemm.c relies on non-standard cblas.h file to compute GEMMs on CPUs, if a BLAS library is discovered by CMake. Define the prototype of the dgemm call inside the file, as BLAS does not define a include file and depending on the BLAS library found this file might exist or not. Also remove a deprecated function call warning that was not captured in previous commits. --- tests/dsl/dtd/CMakeLists.txt | 10 +++---- ...h_simple_gemm.c => dtd_test_simple_gemm.c} | 26 +++++++++++++++---- 2 files changed, 26 insertions(+), 10 deletions(-) rename tests/dsl/dtd/{dtd_bench_simple_gemm.c => dtd_test_simple_gemm.c} (95%) diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index 3b36c5a63..1ce08a6c9 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -29,13 +29,13 @@ parsec_addtest_executable(C dtd_test_ce SOURCES dtd_test_ce.c) if( PARSEC_HAVE_CUDA ) parsec_addtest_executable(C dtd_test_cuda_task_insert SOURCES dtd_test_cuda_task_insert.c) if( TARGET CUDA::cublas ) - parsec_addtest_executable(C dtd_bench_simple_gemm SOURCES dtd_bench_simple_gemm.c) - target_link_libraries(dtd_bench_simple_gemm PRIVATE CUDA::cublas m) + parsec_addtest_executable(C dtd_test_simple_gemm SOURCES dtd_test_simple_gemm.c) + target_link_libraries(dtd_test_simple_gemm PRIVATE CUDA::cublas m) find_package(BLAS) if(BLAS_FOUND) - target_link_libraries(dtd_bench_simple_gemm PRIVATE ${BLAS_LIBRARIES}) - target_include_directories(dtd_bench_simple_gemm PRIVATE ${BLAS_INCLUDE_DIRS}) - target_compile_definitions(dtd_bench_simple_gemm PRIVATE HAVE_BLAS=1) + target_link_libraries(dtd_test_simple_gemm PRIVATE ${BLAS_LIBRARIES}) + target_include_directories(dtd_test_simple_gemm PRIVATE ${BLAS_INCLUDE_DIRS}) + target_compile_definitions(dtd_test_simple_gemm PRIVATE HAVE_BLAS=1) endif(BLAS_FOUND) endif( TARGET CUDA::cublas ) endif() diff --git a/tests/dsl/dtd/dtd_bench_simple_gemm.c b/tests/dsl/dtd/dtd_test_simple_gemm.c similarity index 95% rename from tests/dsl/dtd/dtd_bench_simple_gemm.c rename to tests/dsl/dtd/dtd_test_simple_gemm.c index 550109728..fdb227716 100644 --- a/tests/dsl/dtd/dtd_bench_simple_gemm.c +++ b/tests/dsl/dtd/dtd_test_simple_gemm.c @@ -3,23 +3,39 @@ #include "parsec/data_dist/matrix/matrix.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include "parsec/interfaces/dtd/insert_function_internal.h" -#if defined(PARSEC_HAVE_CUDA) -#include "parsec/mca/device/cuda/device_cuda.h" -#endif +// The file is not compiled if CUDA is not present or CUBLAS is not found +#include "parsec/mca/device/cuda/device_cuda.h" #include "cublas_v2.h" +<<<<<<< HEAD:tests/dsl/dtd/dtd_bench_simple_gemm.c #if HAVE_BLAS #if BLAS_WITH_ESSL #include "essl.h" #else #include "mkl_cblas.h" #endif +======= + +#if defined(HAVE_BLAS) +// If our CMake finds a BLAS library, it defines HAVE_BLAS +// BLAS does not guarantee there is a cblas.h, we define our own prototype +typedef enum CBLAS_LAYOUT {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; +typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE; +typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; +typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; +typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; +#define CBLAS_INDEX int + +extern void cblas_dgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, + const CBLAS_TRANSPOSE TransB, const CBLAS_INDEX M, const CBLAS_INDEX N, + const CBLAS_INDEX K, const double alpha, const double *A, + const CBLAS_INDEX lda, const double *B, const CBLAS_INDEX ldb, + const double beta, double *C, const CBLAS_INDEX ldc); +>>>>>>> dtd_test_simple_gemm.c relies on non-standard cblas.h file:tests/dsl/dtd/dtd_test_simple_gemm.c #endif #if defined(PARSEC_HAVE_MPI) - #include - #endif /* defined(PARSEC_HAVE_MPI) */ #include From 04a772c8036fee9cca3c8c2dbbb842abe74e441f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 29 Jun 2022 02:50:18 +1000 Subject: [PATCH 104/215] rebased with new master --- tests/dsl/dtd/CMakeLists.txt | 4 ---- tests/dsl/dtd/dtd_test_simple_gemm.c | 9 --------- 2 files changed, 13 deletions(-) diff --git a/tests/dsl/dtd/CMakeLists.txt b/tests/dsl/dtd/CMakeLists.txt index 1ce08a6c9..ba06bb8d2 100644 --- a/tests/dsl/dtd/CMakeLists.txt +++ b/tests/dsl/dtd/CMakeLists.txt @@ -21,9 +21,6 @@ parsec_addtest_executable(C dtd_test_global_id_for_dc_assumed SOURCES dtd_test_g parsec_addtest_executable(C dtd_test_explicit_task_creation SOURCES dtd_test_explicit_task_creation.c) parsec_addtest_executable(C dtd_test_tp_enqueue_dequeue SOURCES dtd_test_tp_enqueue_dequeue.c) parsec_addtest_executable(C dtd_test_interleave_actions SOURCES dtd_test_interleave_actions.c) -<<<<<<< HEAD - -======= parsec_addtest_executable(C dtd_test_ce SOURCES dtd_test_ce.c) if( PARSEC_HAVE_CUDA ) @@ -39,4 +36,3 @@ if( PARSEC_HAVE_CUDA ) endif(BLAS_FOUND) endif( TARGET CUDA::cublas ) endif() ->>>>>>> Attempt to backport the revamp of the communication engine (#380) diff --git a/tests/dsl/dtd/dtd_test_simple_gemm.c b/tests/dsl/dtd/dtd_test_simple_gemm.c index fdb227716..1f005cb10 100644 --- a/tests/dsl/dtd/dtd_test_simple_gemm.c +++ b/tests/dsl/dtd/dtd_test_simple_gemm.c @@ -7,14 +7,6 @@ // The file is not compiled if CUDA is not present or CUBLAS is not found #include "parsec/mca/device/cuda/device_cuda.h" #include "cublas_v2.h" -<<<<<<< HEAD:tests/dsl/dtd/dtd_bench_simple_gemm.c -#if HAVE_BLAS -#if BLAS_WITH_ESSL -#include "essl.h" -#else -#include "mkl_cblas.h" -#endif -======= #if defined(HAVE_BLAS) // If our CMake finds a BLAS library, it defines HAVE_BLAS @@ -31,7 +23,6 @@ extern void cblas_dgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, const CBLAS_INDEX K, const double alpha, const double *A, const CBLAS_INDEX lda, const double *B, const CBLAS_INDEX ldb, const double beta, double *C, const CBLAS_INDEX ldc); ->>>>>>> dtd_test_simple_gemm.c relies on non-standard cblas.h file:tests/dsl/dtd/dtd_test_simple_gemm.c #endif #if defined(PARSEC_HAVE_MPI) From 441ebfb4631b469df4091370f4c68a81feda7bff Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 29 Jun 2022 04:56:35 +1000 Subject: [PATCH 105/215] new dwbug statement added to verify parsec_data_copy_destruct --- parsec/data.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/parsec/data.c b/parsec/data.c index 8ea700027..b027a71e3 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -43,6 +43,13 @@ static void parsec_data_copy_destruct(parsec_data_copy_t* obj) /* If the copy is still attached to a data we should detach it first */ if( NULL != obj->original) { + + if ( (obj->device_index == 0) && ( (obj->original != NULL && obj->original->device_copies[1] != NULL) || (obj->original != NULL && obj->original->device_copies[2] != NULL) || (obj->original != NULL && obj->original->device_copies[3] != NULL) ) ) + { + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "parsec_data_copy_destruct original %p device_0 %p device_1 %p device_2 %p", + obj->original, obj->original->device_copies[0], obj->original->device_copies[1], obj->original->device_copies[2]); + } + parsec_data_copy_detach(obj->original, obj, obj->device_index); assert( NULL == obj->original ); } From 9b5eb8f803b0c913a82cc96cd948c462ef8431e1 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 29 Jun 2022 08:06:57 +1000 Subject: [PATCH 106/215] new member original_data_in added to keep track of all original data_in. The original data_in is RELEASED after task is completed. --- parsec/interfaces/ptg/ptg-compiler/jdf2c.c | 2 + parsec/mca/device/cuda/device_cuda_module.c | 84 ++++++++++++++------- parsec/mca/device/device_gpu.h | 2 +- 3 files changed, 61 insertions(+), 27 deletions(-) diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index 431dca4d0..4f333cd9a 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -6843,6 +6843,8 @@ static void jdf_generate_code_hook_cuda(const jdf_t *jdf, coutput(" parsec_device_load[dev_index] += gpu_task->load;\n" " gpu_task->migrate_status = 0;\n" " gpu_task->data_retained = 0;\n" + " int f = 0; \n" + " for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) gpu_task->original_data_in[f] = NULL;" "\n" " return parsec_cuda_kernel_scheduler( es, gpu_task, dev_index );\n" "}\n\n"); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 904f13dac..e3e0170b2 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1413,7 +1413,18 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best possible candidate to to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); - release_after_data_in_is_attached = task_data->data_in; + //release_after_data_in_is_attached = task_data->data_in; + if( gpu_task->original_data_in[ flow->flow_index ] == NULL) + gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + else + { + if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + { + PARSEC_OBJ_RELEASE(task_data->data_in); + PARSEC_OBJ_RETAIN(candidate); + } + + } task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1459,7 +1470,17 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); - release_after_data_in_is_attached = task_data->data_in; + //release_after_data_in_is_attached = task_data->data_in; + if( gpu_task->original_data_in[ flow->flow_index ] == NULL) + gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + else + { + if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + { + PARSEC_OBJ_RELEASE(task_data->data_in); + PARSEC_OBJ_RETAIN(candidate); + } + } task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1501,7 +1522,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } if( NULL == task_data->source_repo_entry && NULL == task_data->data_in->original->dc ) + { transfer_from = -1; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "New tile created original %p "); + } /* Do not need to be tranferred */ if( -1 == transfer_from ) { @@ -1599,14 +1624,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, nb_elts); } parsec_atomic_unlock( &original->lock ); - if( NULL != release_after_data_in_is_attached ) - { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, - release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - __FILE__, __LINE__); - PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - } + //if( NULL != release_after_data_in_is_attached ) + //{ + // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + // release_after_data_in_is_attached, release_after_data_in_is_attached->original, + // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + // __FILE__, __LINE__); + // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + //} assert(0); return -1; } @@ -1632,14 +1657,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } gpu_elem->push_task = gpu_task->ec; /* only the task who does the transfer can modify the data status later. */ parsec_atomic_unlock( &original->lock ); - if( NULL != release_after_data_in_is_attached ) - { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at on device_index %d [readers %d, ref_count %d] %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, - release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - __FILE__, __LINE__); - PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - } + //if( NULL != release_after_data_in_is_attached ) + //{ + // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", + // release_after_data_in_is_attached, release_after_data_in_is_attached->original, + // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + // __FILE__, __LINE__); + // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + //} return 1; } @@ -1660,14 +1685,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, nb_elts, in_elem->version, gpu_elem->version); parsec_atomic_unlock( &original->lock ); - if( NULL != release_after_data_in_is_attached ) - { - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p on device_index %d [readers %d, ref_count %d] at %s:%d", - release_after_data_in_is_attached, release_after_data_in_is_attached->original, release_after_data_in_is_attached->device_index, - release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - __FILE__, __LINE__); - PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - } + //if( NULL != release_after_data_in_is_attached ) + //{ + // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", + // release_after_data_in_is_attached, release_after_data_in_is_attached->original, + // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, + // __FILE__, __LINE__); + // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); + //} /* TODO: data keeps the same coherence flags as before */ return 0; } @@ -2943,6 +2968,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec = NULL; goto remove_gpu_task; } + + int f = 0; + for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) + { + if( gpu_task->original_data_in[f] != NULL ) + PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } parsec_cuda_kernel_epilog( gpu_device, gpu_task ); __parsec_complete_execution( es, gpu_task->ec ); diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 6c822dac0..5f31156f9 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -86,7 +86,7 @@ struct parsec_gpu_task_s { parsec_stage_out_function_t *stage_out; int migrate_status; int32_t posssible_candidate[MAX_PARAM_COUNT]; - int32_t data_retained; + parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From 3efff9a7fe53b5a87bb839aa18b47aa75a36559c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 28 Jun 2022 18:25:58 -0400 Subject: [PATCH 107/215] rebased with branch mca --- parsec/mca/device/device_gpu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 5f31156f9..0f316d054 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -87,6 +87,7 @@ struct parsec_gpu_task_s { int migrate_status; int32_t posssible_candidate[MAX_PARAM_COUNT]; parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; + int32_t data_retained; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; From a159f4383d51b5809c601b01e8798348f4b36b8d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 29 Jun 2022 15:30:54 -0400 Subject: [PATCH 108/215] additional debug statements added --- parsec/data.c | 3 +++ parsec/datarepo.c | 32 ++++++++++++++++---------------- parsec/parsec_reshape.c | 7 +++++++ parsec/remote_dep_mpi.c | 11 +++++++++++ 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index b027a71e3..2a3858101 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -545,6 +545,9 @@ void parsec_data_copy_release(parsec_data_copy_t* copy) { /* TODO: Move the copy back to the CPU before destroying it */ PARSEC_DATA_COPY_RELEASE(copy); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (parsec_cleanup_reshape_promise )copy %p [ref_count %d] for [original %p] ", + copy, copy->super.super.obj_reference_count, copy->original); } void* parsec_data_copy_get_ptr(parsec_data_copy_t* data) diff --git a/parsec/datarepo.c b/parsec/datarepo.c index 99d9058fd..28faa7162 100644 --- a/parsec/datarepo.c +++ b/parsec/datarepo.c @@ -83,9 +83,9 @@ __data_repo_lookup_entry_and_create(parsec_execution_stream_t *es, data_repo_t * parsec_hash_table_nolock_insert(&repo->table, &e->ht_item); parsec_hash_table_unlock_bucket(&repo->table, key); - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has been allocated with an usage count of %u/%u and is retained %d at %s:%d", - e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, e->usagecnt, e->usagelmt, e->retained, file, line); - + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has been allocated with an usage count of %u/%u and is retained %d at %s:%d", + // e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, e->usagecnt, e->usagelmt, e->retained, file, line); +// return e; } @@ -106,23 +106,23 @@ __data_repo_entry_used_once(data_repo_t *repo, parsec_key_t key e = (data_repo_entry_t*)parsec_hash_table_nolock_find(&repo->table, key); #if defined(PARSEC_DEBUG_NOISIER) if( NULL == e ) { - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %s of hash table %s could not be found at %s:%d", - repo->table.key_functions.key_print(estr, 64, key, repo->table.hash_data), tablename, file, line); + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %s of hash table %s could not be found at %s:%d", + // repo->table.key_functions.key_print(estr, 64, key, repo->table.hash_data), tablename, file, line); } #endif assert( NULL != e ); r = parsec_atomic_fetch_inc_int32(&e->usagecnt) + 1; if( (e->usagelmt == r) && (0 == e->retained) ) { - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has a usage count of %u/%u and is not retained: freeing it at %s:%d", - e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, r, r, file, line); + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has a usage count of %u/%u and is not retained: freeing it at %s:%d", + // e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, r, r, file, line); parsec_hash_table_nolock_remove(&repo->table, key); parsec_hash_table_unlock_bucket(&repo->table, key); parsec_thread_mempool_free(e->data_repo_mempool_owner, e ); } else { - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has %u/%u usage count and %s retained: not freeing it at %s:%d", - e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, r, e->usagelmt, e->retained ? "is" : "is not", file, line); + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "entry %p/%s of hash table %s has %u/%u usage count and %s retained: not freeing it at %s:%d", + // e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, r, e->usagelmt, e->retained ? "is" : "is not", file, line); parsec_hash_table_unlock_bucket(&repo->table, key); } } @@ -159,9 +159,9 @@ __data_repo_entry_addto_usage_limit(data_repo_t *repo, parsec_key_t key, uint32_ parsec_hash_table_unlock_bucket(&repo->table, key); parsec_thread_mempool_free(e->data_repo_mempool_owner, e ); } else { - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, - "entry %p/%s of hash table %s has a usage count of %u/%u and is %s retained at %s:%d", - e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, e->usagecnt, e->usagelmt, e->retained ? "still" : "no more", file, line); + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + // "entry %p/%s of hash table %s has a usage count of %u/%u and is %s retained at %s:%d", + // e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), tablename, e->usagecnt, e->usagelmt, e->retained ? "still" : "no more", file, line); parsec_hash_table_unlock_bucket(&repo->table, key); } } @@ -172,10 +172,10 @@ static void print_data_repo_entry(void *item, void *cb_data) char estr[64]; data_repo_t *repo = (data_repo_t*)cb_data; data_repo_entry_t *e = (data_repo_entry_t*)item; - PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, - "entry %p/%s of hash table %p has a usage count of %u/%u and is" - " %s retained while the repo is destroyed", - e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), repo, e->usagecnt, e->usagelmt, e->retained ? "still" : "no more"); + //PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + // "entry %p/%s of hash table %p has a usage count of %u/%u and is" + // " %s retained while the repo is destroyed", + // e, repo->table.key_functions.key_print(estr, 64, e->ht_item.key, repo->table.hash_data), repo, e->usagecnt, e->usagelmt, e->retained ? "still" : "no more"); } #endif diff --git a/parsec/parsec_reshape.c b/parsec/parsec_reshape.c index 0fd6d5779..eb4e954a1 100644 --- a/parsec/parsec_reshape.c +++ b/parsec/parsec_reshape.c @@ -38,6 +38,9 @@ void parsec_cleanup_reshape_promise(parsec_base_future_t *future) */ if(d_fut->super.status & PARSEC_DATA_FUTURE_STATUS_TRIGGERED){ PARSEC_DATA_COPY_RELEASE(future_in_data->data); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (parsec_cleanup_reshape_promise )copy %p [ref_count %d] for [original %p]", + future_in_data->data, future_in_data->data->super.super.obj_reference_count, future_in_data->data->original); } if(future_in_data->local != NULL){ free(future_in_data->local); @@ -51,6 +54,10 @@ void parsec_cleanup_reshape_promise(parsec_base_future_t *future) if(d_fut->super.tracked_data != NULL){ parsec_data_copy_t * data = (parsec_data_copy_t*) d_fut->super.tracked_data; PARSEC_DATA_COPY_RELEASE(data); + + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (parsec_cleanup_reshape_promise )copy %p [ref_count %d] for [original %p] ", + data, data->super.super.obj_reference_count, data->original); } } diff --git a/parsec/remote_dep_mpi.c b/parsec/remote_dep_mpi.c index 15515f2ec..2b30a3523 100644 --- a/parsec/remote_dep_mpi.c +++ b/parsec/remote_dep_mpi.c @@ -1083,7 +1083,12 @@ remote_dep_release_incoming(parsec_execution_stream_t* es, assert(i < MAX_PARAM_COUNT); if( !((1U<output[i].data.data ) /* except CONTROLs */ + { PARSEC_DATA_COPY_RELEASE(origin->output[i].data.data); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (remote_dep_release_incoming )copy %p [ref_count %d] for [original %p] ", + origin->output[i].data.data, origin->output[i].data.data->super.super.obj_reference_count, origin->output[i].data.data->original); + } } #if defined(PARSEC_DIST_COLLECTIVES) if(PARSEC_TASKPOOL_TYPE_PTG == origin->taskpool->taskpool_type) { @@ -1415,6 +1420,9 @@ static int remote_dep_nothread_memcpy(parsec_execution_stream_t* es, cmd->memcpy.source, cmd->memcpy.layout.src_displ, cmd->memcpy.layout.src_datatype, cmd->memcpy.layout.src_count); PARSEC_DATA_COPY_RELEASE(cmd->memcpy.source); + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (remote_dep_release_incoming )copy %p [ref_count %d] for [original %p] ", + cmd->memcpy.source, cmd->memcpy.source->super.super.obj_reference_count, cmd->memcpy.source->original); remote_dep_dec_flying_messages(item->cmd.memcpy.taskpool); (void)es; return rc; @@ -1667,6 +1675,9 @@ remote_dep_mpi_put_start(parsec_execution_stream_t* es, PARSEC_OBJ_RETAIN(reshape_data); PARSEC_DATA_COPY_RELEASE(old_data);/*old data has been retained for remote communication*/ + PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, + "PARSEC_DATA_COPY_RELEASE (remote_dep_release_incoming )copy %p [ref_count %d] for [original %p] ", + old_data, old_data->super.super.obj_reference_count, old_data->original); PARSEC_OBJ_RELEASE(deps->output[k].data.data_future); deps->output[k].data.data_future = NULL; From f1210f807c5591e0d788b9dba833a16ee0afa298 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 29 Jun 2022 16:49:10 -0400 Subject: [PATCH 109/215] RELEASE of gpu_task->original_data_in[f] moved to after __parsec_complete_execution() the iterate successor code calls the reahape code and this code works on data copies. So if the loop has to be called after everything the task wants to do is complete. --- parsec/mca/device/cuda/device_cuda_module.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index e3e0170b2..6eae3db65 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2968,6 +2968,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec = NULL; goto remove_gpu_task; } + + parsec_cuda_kernel_epilog( gpu_device, gpu_task ); + __parsec_complete_execution( es, gpu_task->ec ); + gpu_device->super.executed_tasks++; int f = 0; for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) @@ -2975,10 +2979,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( gpu_task->original_data_in[f] != NULL ) PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); } - - parsec_cuda_kernel_epilog( gpu_device, gpu_task ); - __parsec_complete_execution( es, gpu_task->ec ); - gpu_device->super.executed_tasks++; parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); From 47be6cd6d2e03edd831a513564ec1a1e0b752cc5 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 6 Jul 2022 15:14:04 -0400 Subject: [PATCH 110/215] unneccesary PARSEC_OBJ_RETAIN removed --- parsec/mca/device/cuda/device_cuda_module.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 6eae3db65..b5f1e8551 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1419,11 +1419,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, else { if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - { PARSEC_OBJ_RELEASE(task_data->data_in); - PARSEC_OBJ_RETAIN(candidate); - } - } task_data->data_in = candidate; in_elem = candidate; @@ -1476,11 +1472,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, else { if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - { - PARSEC_OBJ_RELEASE(task_data->data_in); - PARSEC_OBJ_RETAIN(candidate); - } + PARSEC_OBJ_RELEASE(task_data->data_in); } + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; From b55edefa9031be37b9b6ed543969c10b9124f956 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 6 Jul 2022 19:06:53 -0400 Subject: [PATCH 111/215] documentation and code cleanup --- parsec/mca/device/cuda/device_cuda_module.c | 57 +++++++++------------ 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index b5f1e8551..54e6d4fe9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1385,6 +1385,13 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; + /** + * A possible candidate is set when we call change_task_features() during migration + * preperation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater + * tha 1, it means that we have already identifies a staged_in data as the possible + * candidate. So we can directly use that data for D2D ytransfer. + */ + parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); //decrement the reader corresponding to the first stage_in @@ -1404,6 +1411,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ) { + /** + * if the data was already staged_in then we would have already incremented + * the reader for it. + */ if(gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; @@ -1412,8 +1423,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best possible candidate to to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); - - //release_after_data_in_is_attached = task_data->data_in; + + /** + * If we change the data_in of a task, we should RELEASE it only after the task + * is complete. Or else it may cause problem for special cases in PTG, when calling + * the data reshape associated with iterate successor (For instance this happens when + * NEW is called for a data created solely in the GPU). To mitigate this problem we + * save the original data_in to be released in the end. + */ if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; else @@ -1456,7 +1473,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - //parsec_atomic_fetch_inc_int32( &candidate->readers ); PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; @@ -1466,7 +1482,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); - //release_after_data_in_is_attached = task_data->data_in; if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; else @@ -1474,6 +1489,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) PARSEC_OBJ_RELEASE(task_data->data_in); } + + /** + * Why do we need this increment. For some reason, if this increment is not + * done the reader goes to 0, when the number of CUDA device is greater than 2. + */ PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); task_data->data_in = candidate; in_elem = candidate; @@ -1618,14 +1638,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, nb_elts); } parsec_atomic_unlock( &original->lock ); - //if( NULL != release_after_data_in_is_attached ) - //{ - // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", - // release_after_data_in_is_attached, release_after_data_in_is_attached->original, - // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - // __FILE__, __LINE__); - // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - //} assert(0); return -1; } @@ -1651,20 +1663,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } gpu_elem->push_task = gpu_task->ec; /* only the task who does the transfer can modify the data status later. */ parsec_atomic_unlock( &original->lock ); - //if( NULL != release_after_data_in_is_attached ) - //{ - // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p at [readers %d, ref_count %d] %s:%d", - // release_after_data_in_is_attached, release_after_data_in_is_attached->original, - // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - // __FILE__, __LINE__); - // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - //} return 1; } if( undo_readers_inc_if_no_transfer ) { - //parsec_atomic_fetch_dec_int32( &in_elem->readers ); PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); } assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); @@ -1679,15 +1682,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, nb_elts, in_elem->version, gpu_elem->version); parsec_atomic_unlock( &original->lock ); - //if( NULL != release_after_data_in_is_attached ) - //{ - // PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Release copy %p attached to original %p [readers %d, ref_count %d] at %s:%d", - // release_after_data_in_is_attached, release_after_data_in_is_attached->original, - // release_after_data_in_is_attached->readers, release_after_data_in_is_attached->super.super.obj_reference_count, - // __FILE__, __LINE__); - // PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); - //} - /* TODO: data keeps the same coherence flags as before */ return 0; } @@ -2018,7 +2012,6 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, /* Nobody is at the door to handle that event on the source of that data... * we do the command directly */ parsec_atomic_lock( &task->data[i].data_in->original->lock ); - //task->data[i].data_in->readers--; PARSEC_DATA_COPY_DEC_READERS_ATOMIC(task->data[i].data_in); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tExecuting D2D transfer complete for copy %p [ref_count %d] for " @@ -2089,7 +2082,6 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, tmp, gpu_copy->readers, gpu_copy->device_index, gpu_copy->version, gpu_copy->flags, gpu_copy->coherency_state, gpu_copy->data_transfer_status); - //gpu_copy->readers--; PARSEC_DATA_COPY_DEC_READERS_ATOMIC(gpu_copy); if( 0 == gpu_copy->readers ) { parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); @@ -2446,7 +2438,6 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, } parsec_atomic_lock(&original->lock); if( flow->flow_flags & PARSEC_FLOW_ACCESS_READ ) { - //gpu_copy->readers--; PARSEC_DATA_COPY_DEC_READERS_ATOMIC(gpu_copy); if( gpu_copy->readers < 0 ) { PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, From 9967f22273e39968502bb14ac860685fa98f420c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 7 Jul 2022 14:54:47 -0400 Subject: [PATCH 112/215] add task to device mapping for iterative application A task mapped to a particular device. The mapping happens at two places. 1. When the task is mapped to a device for the first time (first iteration) 2. When the task is migrated to a new gpu. --- .../mca/device/cuda/device_cuda_component.c | 5 ++ parsec/mca/device/cuda/device_cuda_migrate.c | 68 +++++++++++++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 13 +++- parsec/mca/device/cuda/device_cuda_module.c | 8 +++ parsec/mca/device/device.c | 22 +++++- 5 files changed, 113 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 05c78747b..c1e4bdd22 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -41,6 +41,7 @@ int parsec_cuda_sort_pending = 0, parsec_cuda_max_streams = PARSEC_GPU_MAX_STREA int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; char* parsec_cuda_lib_path = NULL; int parsec_cuda_migrate_tasks = 0; +int parsec_cuda_iterative = 0; static int cuda_mask, cuda_nvlink_mask; @@ -201,6 +202,10 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_tasks", "Boolean to let the GPU engine migrate tasks", false, false, 0, &parsec_cuda_migrate_tasks); + (void)parsec_mca_param_reg_int_name("device_cuda", "iterative", + "Boolean to let the GPU know the workload is iterative", + false, false, 0, &parsec_cuda_iterative); + #if defined(PARSEC_PROF_TRACE) (void)parsec_mca_param_reg_int_name("device_cuda", "one_profiling_stream_per_cuda_stream", "Boolean to separate the profiling of each cuda stream into a single profiling stream", diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e94ea9f4a..ec13e7e16 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,12 +4,27 @@ extern int parsec_device_cuda_enabled; parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; +static parsec_hash_table_t *task_mapping_ht = NULL; double start = 0; double end = 0; PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); +static parsec_key_fn_t task_mapping_table_generic_key_fn = { + .key_equal = parsec_hash_table_generic_64bits_key_equal, + .key_hash = parsec_hash_table_generic_64bits_key_hash, + .key_print = parsec_hash_table_generic_64bits_key_print +}; + +static void task_mapping_ht_free_elt(void *_item, void *table) +{ + task_mapping_item_t *item = (task_mapping_item_t*)_item; + parsec_key_t key = item->ht_item.key; + parsec_hash_table_nolock_remove(table, key); + free(item); +} + /** * @brief The function initialises the data structures required * for inter-device migration. @@ -49,6 +64,9 @@ int parsec_cuda_migrate_init(int ndevices) nvml_ret = nvmlInit_v2(); #endif + task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); + parsec_hash_table_init(task_mapping_ht, offsetof(task_mapping_item_t, ht_item), 16, task_mapping_table_generic_key_fn, NULL); + char hostname[256]; gethostname(hostname, sizeof(hostname)); printf("PID %d on %s ready for attach\n", getpid(), hostname); @@ -71,6 +89,11 @@ int parsec_cuda_migrate_fini() nvmlShutdown(); #endif + parsec_hash_table_for_all(task_mapping_ht, task_mapping_ht_free_elt, task_mapping_ht); + parsec_hash_table_fini(task_mapping_ht); + PARSEC_OBJ_RELEASE(task_mapping_ht); + task_mapping_ht = NULL; + for(i = 0; i < NDEVICES; i++) { printf("\n*********** DEVICE %d *********** \n", i); @@ -554,6 +577,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_lock( &original->lock ); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); @@ -602,5 +630,45 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo return 0; } +int update_task_to_device_mapping(parsec_task_t *task, int device_index) +{ + parsec_key_t key; + task_mapping_item_t *item; + + key = task->task_class->make_key(task->taskpool, task->locals); + if( NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key)) ) + { + + item = (task_mapping_item_t *)malloc(sizeof(task_mapping_item_t)); + item->device_index = device_index; + item->ht_item.key = key; + parsec_hash_table_lock_bucket(task_mapping_ht, key); + parsec_hash_table_nolock_insert(task_mapping_ht, &item->ht_item); + parsec_hash_table_unlock_bucket(task_mapping_ht, key); + } + else + item->ht_item.key = key; +} + +int find_task_to_device_mapping(parsec_task_t *task) +{ + parsec_key_t key; + task_mapping_item_t *item; + + key = task->task_class->make_key(task->taskpool, task->locals); + if( NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key)) ) + return -1; + + return item->device_index; +} + + + + + + + + + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index f5610f1df..ea9d71eec 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -41,13 +41,20 @@ typedef struct parsec_device_cuda_info_s typedef struct migrated_task_s { - parsec_list_item_t list_item; - parsec_gpu_task_t* gpu_task; + parsec_list_item_t list_item; + parsec_gpu_task_t* gpu_task; parsec_device_gpu_module_t* dealer_device; parsec_device_gpu_module_t* starving_device; int stage_in_status; } migrated_task_t; +typedef struct task_mapping_item_s +{ + parsec_hash_table_item_t ht_item; + int device_index; +} task_mapping_item_t; + + int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); int parsec_cuda_get_device_load(int device); @@ -74,6 +81,8 @@ int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data parsec_data_copy_t* src_copy, parsec_data_copy_t* dst_copy, uint8_t stage_in_device, uint8_t access_mode); double current_time(); +int update_task_to_device_mapping(parsec_task_t *task, int device_index); +int find_task_to_device_mapping(parsec_task_t *task); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 54e6d4fe9..4c8ca79ba 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -49,6 +49,7 @@ static int parsec_cuda_memory_release( parsec_device_cuda_module_t* gpu_device ) static int parsec_cuda_flush_lru( parsec_device_module_t *device ); extern int parsec_cuda_migrate_tasks; +extern int parsec_cuda_iterative; /* look up how many FMA per cycle in single/double, per cuda MP * precision. @@ -2919,6 +2920,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(gpu_device->pending) ); if( NULL != gpu_task ) { + + /** + * if the task has been migrated, we have to update the mapping. + */ + if(parsec_cuda_iterative && (gpu_task->migrate_status > TASK_NOT_MIGRATED)) + update_task_to_device_mapping(gpu_task->ec, gpu_device->super.device_index); + pop_null = 0; gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; /* force at least one tour */ PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tGet from shared queue %s priority %d", gpu_device->super.name, diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 65c33e5ff..7db14d91a 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -14,6 +14,7 @@ #include "parsec/execution_stream.h" #include "parsec/utils/argv.h" #include "parsec/parsec_internal.h" +#include "parsec/mca/device/cuda/device_cuda_migrate.h" #include #if defined(PARSEC_HAVE_ERRNO_H) @@ -45,6 +46,8 @@ static parsec_device_module_t **modules_activated = NULL; static mca_base_component_t **device_components = NULL; +extern int parsec_cuda_iterative; + /** * Temporary solution: Use the following two arrays to taskpool the weight and * the load on different devices. These arrays are not available before the @@ -73,6 +76,15 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) int i, dev_index = -1, data_index; parsec_taskpool_t* tp = this_task->taskpool; + if(parsec_cuda_iterative) + { + // if task to device mapping is already available use that + dev_index = find_task_to_device_mapping(this_task); + if(dev_index != -1) + return dev_index; + } + + /* Select the location of the first data that is used in READ/WRITE or pick the * location of one of the READ data. For now use the last one. */ @@ -168,7 +180,15 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } - //dev_index = 2; //CHANGE THIS, only for testing all task mapped to first GPU device + + /** + * update task to device mapping. The code control reaches here only if there was no + * previous mapping available + */ + + if(parsec_cuda_iterative) + update_task_to_device_mapping(this_task, dev_index); + return dev_index; } From 4261d0614000d44ad55a2e5cf1d87118688e6de7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 7 Jul 2022 22:16:08 -0400 Subject: [PATCH 113/215] new accounting information added --- parsec/mca/device/cuda/device_cuda_migrate.c | 15 +++++++++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 3 +++ parsec/mca/device/device.c | 4 ++-- parsec/parsec.c | 1 + parsec/scheduling.c | 1 + 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ec13e7e16..80a68788e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -5,6 +5,9 @@ parsec_device_cuda_info_t* device_info; static parsec_list_t* migrated_task_list; static int NDEVICES; static parsec_hash_table_t *task_mapping_ht = NULL; +static int task_migrated_per_tp = 0; +static int tp_count; + double start = 0; double end = 0; @@ -428,6 +431,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu device_info[dealer_device_index].level2++; } nb_migrated++; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); /** * @brief change migrate_status according to the status of the stage in of the @@ -662,6 +666,17 @@ int find_task_to_device_mapping(parsec_task_t *task) return item->device_index; } +void clear_task_migrated_per_tp() +{ + task_migrated_per_tp = 0; +} + +void print_task_migrated_per_tp() +{ + printf("\n*********** TASKPOOL %d *********** \n", tp_count++); + printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); +} + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index ea9d71eec..118a157a2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -83,6 +83,9 @@ int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data double current_time(); int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); +void clear_task_migrated_per_tp(); +void print_task_migrated_per_tp(); + diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 7db14d91a..297cfa094 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -186,8 +186,8 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) * previous mapping available */ - if(parsec_cuda_iterative) - update_task_to_device_mapping(this_task, dev_index); + //if(parsec_cuda_iterative) + // update_task_to_device_mapping(this_task, dev_index); return dev_index; } diff --git a/parsec/parsec.c b/parsec/parsec.c index 9577f51b4..8c7adc1ae 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -2175,6 +2175,7 @@ void parsec_taskpool_unregister( parsec_taskpool_t* tp ) void parsec_taskpool_free(parsec_taskpool_t *tp) { + print_task_migrated_per_tp(); assert(NULL != tp); PARSEC_OBJ_RELEASE(tp); } diff --git a/parsec/scheduling.c b/parsec/scheduling.c index d4a904f1f..b342c2945 100644 --- a/parsec/scheduling.c +++ b/parsec/scheduling.c @@ -624,6 +624,7 @@ int parsec_context_add_taskpool( parsec_context_t* context, parsec_taskpool_t* t if( NULL == parsec_current_scheduler) { parsec_set_scheduler( context ); } + clear_task_migrated_per_tp(); tp->context = context; /* save the context */ From d14b4f1410d10dad9a5738e8b7fff5e374042fd0 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 8 Jul 2022 17:20:38 -0400 Subject: [PATCH 114/215] starvation condition updated. Device is starving is the task count is less than twice the execution stream --- parsec/mca/device/cuda/device_cuda_migrate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 80a68788e..eb3fd7c7f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -231,14 +231,13 @@ int parsec_cuda_tasks_executed(int device) */ int is_starving(int device) { - return ( parsec_cuda_get_device_task(device, -1) < 1 ) ? 1 : 0; - + return ( parsec_cuda_get_device_task(device, -1) < 5 ) ? 1 : 0; } int will_starve(int device) { - return ( parsec_cuda_get_device_task(device, -1) < 3 ) ? 1 : 0; + return ( (parsec_cuda_get_device_task(device, -1) - 1) < 5 ) ? 1 : 0; } /** @@ -677,6 +676,11 @@ void print_task_migrated_per_tp() printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); } +int get_tp_count() +{ + return tp_count; +} + From afd61728ecd1821506d93a9d5d6b6bf8d9fe392c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 10 Jul 2022 12:07:32 -0400 Subject: [PATCH 115/215] minor code changes --- parsec/data.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 1 + parsec/mca/device/cuda/device_cuda_module.c | 6 ++++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 2a3858101..52a1ff8e7 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -235,7 +235,7 @@ int parsec_data_copy_detach(parsec_data_t* data, PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "DEV[%d]: parsec_data_copy_detach failed to identify new owner (younger version exists in device %d): original %p device_copy %p", device, younger_version, data, copy); - assert(0); + //assert(0); } if( new_owner_copy != NULL ) PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 118a157a2..1618357d0 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -85,6 +85,7 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); void print_task_migrated_per_tp(); +int get_tp_count(); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 4c8ca79ba..af355e9f5 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1495,7 +1495,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * Why do we need this increment. For some reason, if this increment is not * done the reader goes to 0, when the number of CUDA device is greater than 2. */ - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + if(parsec_cuda_migrate_tasks) + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -2892,7 +2893,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, fetch_task_from_shared_queue: - //printf(" time %lf device %d level0 %d level1 %d level2 %d total %d \n", + //printf(" tp %d time %lf device %d level0 %d level1 %d level2 %d total %d \n", + // get_tp_count(), // current_time(), // CUDA_DEVICE_NUM(gpu_device->super.device_index), // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), From a7b69ef71057e3ad9aa6e25227263b8e06cb0320 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 11 Jul 2022 12:41:08 -0400 Subject: [PATCH 116/215] parsec_gpu_data_stage_in() code trimmed --- parsec/mca/device/cuda/device_cuda_module.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index af355e9f5..39f8ca761 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1379,6 +1379,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* If gpu_elem is not invalid, then it is already there and the right version, * and we're not going to transfer from another source, skip the selection */ + #if 0 if( gpu_elem->coherency_state != PARSEC_DATA_COHERENCY_INVALID ) { if( (gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) @@ -1401,12 +1402,19 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, goto src_selected; } } + #endif // if the task is a migrated task and the possible candidate has already been identified if( (gpu_task->migrate_status > TASK_NOT_MIGRATED) && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) { int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; + /** + * A possible candidate is set when we call change_task_features() during migration + * preperation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater + * tha 1, it means that we have already identifies a staged_in data as the possible + * candidate. So we can directly use that data for D2D ytransfer. + */ parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); From c2e5b726dfe1560b6a5516485290b708e479e5fe Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 11 Jul 2022 16:27:53 -0400 Subject: [PATCH 117/215] reader incremented when task is found immediatly from task_data->data_in --- parsec/mca/device/cuda/device_cuda_module.c | 45 +++++---------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 39f8ca761..534c241ff 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1373,36 +1373,16 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( PARSEC_DEV_CUDA == in_elem_dev->super.super.type ) { if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { /* We can directly do D2D, so let's skip the selection */ - goto src_selected; - } - } - - /* If gpu_elem is not invalid, then it is already there and the right version, - * and we're not going to transfer from another source, skip the selection */ - #if 0 - if( gpu_elem->coherency_state != PARSEC_DATA_COHERENCY_INVALID ) - { - if( (gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) - { - - int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; - /** - * A possible candidate is set when we call change_task_features() during migration - * preperation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater - * tha 1, it means that we have already identifies a staged_in data as the possible - * candidate. So we can directly use that data for D2D ytransfer. - */ + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", + gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); + + PARSEC_DATA_COPY_INC_READERS_ATOMIC(in_elem); + undo_readers_inc_if_no_transfer = 1; - parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; - parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); - //decrement the reader corresponding to the first stage_in - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(candidate); - goto src_selected; } } - #endif // if the task is a migrated task and the possible candidate has already been identified if( (gpu_task->migrate_status > TASK_NOT_MIGRATED) @@ -1430,7 +1410,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_OBJ_RETAIN(candidate); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best possible candidate to to Device to Device copy", + "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 2) to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); /** @@ -1480,16 +1460,13 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* candidate is the best candidate to do D2D. Let's register as a reader for this * data copy, and we can unlock and schedule the D2D. */ PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy, increasing its readers to %d", + "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate (case 3) to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; /* We swap data_in with candidate, so we update the reference counters */ PARSEC_OBJ_RETAIN(candidate); - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate to to Device to Device copy", - gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; @@ -1503,8 +1480,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * Why do we need this increment. For some reason, if this increment is not * done the reader goes to 0, when the number of CUDA device is greater than 2. */ - if(parsec_cuda_migrate_tasks) - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + //if(parsec_cuda_migrate_tasks) + // PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; From f4bf594b5abc4ba2eb0fb2778071d87ef588d110 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 12 Jul 2022 11:07:57 -0400 Subject: [PATCH 118/215] responsibility of task completion moved from GPU manager thread to the CPU threads. A special case is added to __parsec_task_progress() to deal with task completion of the GPU tasks. As the gpu_task->ec->status is updated, the spaecial case will make sure that the the tasks will not be executed again. --- parsec/mca/device/cuda/device_cuda_module.c | 20 ++++++++++++++++---- parsec/scheduling.c | 8 ++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 534c241ff..d829371ce 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2948,10 +2948,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec = NULL; goto remove_gpu_task; } - - parsec_cuda_kernel_epilog( gpu_device, gpu_task ); - __parsec_complete_execution( es, gpu_task->ec ); - gpu_device->super.executed_tasks++; int f = 0; for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) @@ -2960,6 +2956,22 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); } + parsec_cuda_kernel_epilog( gpu_device, gpu_task ); + gpu_device->super.executed_tasks++; + + /** + * responsibility of task completion moved from GPU manager thread + * to the CPU threads. A special case is added to __parsec_task_progress() + * to deal with task completion of the GPU tasks. As the gpu_task->ec->status + * is updated, the spaecial case will make sure that the the tasks will not be + * executed again. + */ + + gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE; + int distance = 0; + PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec); + __parsec_schedule(es, gpu_task->ec, distance); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); diff --git a/parsec/scheduling.c b/parsec/scheduling.c index b342c2945..57b4da22a 100644 --- a/parsec/scheduling.c +++ b/parsec/scheduling.c @@ -431,6 +431,14 @@ int __parsec_task_progress( parsec_execution_stream_t* es, rc = task->task_class->prepare_input(es, task); PARSEC_PINS(es, PREPARE_INPUT_END, task); } + + //A special case is added to deal with task completion of the GPU tasks + if( task->status == PARSEC_TASK_STATUS_COMPLETE) + { + __parsec_complete_execution( es, task ); + return PARSEC_HOOK_RETURN_DONE; + } + switch(rc) { case PARSEC_HOOK_RETURN_DONE: { if(task->status <= PARSEC_TASK_STATUS_HOOK) { From 1f94d5c5c6ae8074b0e88b503dcc96aaee72f407 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 12 Jul 2022 15:18:26 -0400 Subject: [PATCH 119/215] profiling details updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 16 ++++++++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 16 +++++++++----- parsec/mca/device/cuda/device_cuda_module.c | 23 ++++++++++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index eb3fd7c7f..c8dc35b2e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -20,6 +20,9 @@ static parsec_key_fn_t task_mapping_table_generic_key_fn = { .key_print = parsec_hash_table_generic_64bits_key_print }; +int parsec_gpu_task_count_start; +int parsec_gpu_task_count_end; + static void task_mapping_ht_free_elt(void *_item, void *table) { task_mapping_item_t *item = (task_mapping_item_t*)_item; @@ -28,6 +31,16 @@ static void task_mapping_ht_free_elt(void *_item, void *table) free(item); } + +static inline void gpu_dev_profiling_init(void) +{ + static const char *gpu_dev_prof_info_str = "task_count{int32_t}"; + parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", + sizeof(int32_t), + gpu_dev_prof_info_str, + &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); +} + /** * @brief The function initialises the data structures required * for inter-device migration. @@ -69,6 +82,9 @@ int parsec_cuda_migrate_init(int ndevices) task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); parsec_hash_table_init(task_mapping_ht, offsetof(task_mapping_item_t, ht_item), 16, task_mapping_table_generic_key_fn, NULL); + #if defined(PARSEC_PROF_TRACE) + gpu_dev_profiling_init(); + #endif char hostname[256]; gethostname(hostname, sizeof(hostname)); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 1618357d0..ba34ff3bc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -54,6 +54,16 @@ typedef struct task_mapping_item_s int device_index; } task_mapping_item_t; +//typedef struct gpu_dev_prof_s +//{ +// int32_t device_index; +// int32_t task_count; +//} gpu_dev_prof_t; +//static const char *gpu_dev_prof_info_str = "int32_t{device_index};int32_t{task_count}"; + +//static int parsec_gpu_task_count_start; +//static int parsec_gpu_task_count_end; + int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); @@ -86,11 +96,7 @@ int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); void print_task_migrated_per_tp(); int get_tp_count(); - - - - - +static inline void gpu_dev_profiling_init(void); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d829371ce..27a2bb613 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -50,6 +50,8 @@ static int parsec_cuda_flush_lru( parsec_device_module_t *device ); extern int parsec_cuda_migrate_tasks; extern int parsec_cuda_iterative; +extern int parsec_gpu_task_count_start; +extern int parsec_gpu_task_count_end; /* look up how many FMA per cycle in single/double, per cuda MP * precision. @@ -2769,6 +2771,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); + + #if defined(PARSEC_PROF_TRACE) + parsec_profiling_trace_flags(es->es_profile, + parsec_gpu_task_count_start, + (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), + gpu_device->super.device_index, &gpu_device->mutex, 0); + #endif return PARSEC_HOOK_RETURN_ASYNC; } PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Entering GPU management at %s:%d", @@ -2780,6 +2789,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, (unsigned long)es, PROFILE_OBJECT_ID_NULL, NULL ); #endif /* defined(PARSEC_PROF_TRACE) */ +#if defined(PARSEC_PROF_TRACE) + parsec_profiling_trace_flags(es->es_profile, + parsec_gpu_task_count_start, + (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), + gpu_device->super.device_index, &gpu_device->mutex, 0); +#endif + status = cudaSetDevice( cuda_device->cuda_index ); PARSEC_CUDA_CHECK_ERROR( "(parsec_cuda_kernel_scheduler) cudaSetDevice ", status, {return PARSEC_HOOK_RETURN_DISABLE;} ); @@ -2959,6 +2975,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_cuda_kernel_epilog( gpu_device, gpu_task ); gpu_device->super.executed_tasks++; + #if defined(PARSEC_PROF_TRACE) + parsec_profiling_trace_flags(es->es_profile, + parsec_gpu_task_count_end, + (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), + gpu_device->super.device_index, &gpu_device->mutex, 0); + #endif + /** * responsibility of task completion moved from GPU manager thread * to the CPU threads. A special case is added to __parsec_task_progress() From f6f7ac9d8a69583da422941bdc964567d45a4c70 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 14 Jul 2022 09:27:53 -0400 Subject: [PATCH 120/215] Profiling code updated. --- parsec/mca/device/cuda/device_cuda_migrate.c | 492 +++++++++---------- parsec/mca/device/cuda/device_cuda_migrate.h | 74 ++- parsec/mca/device/cuda/device_cuda_module.c | 27 +- 3 files changed, 269 insertions(+), 324 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c8dc35b2e..01c9427df 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,71 +1,69 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" extern int parsec_device_cuda_enabled; -parsec_device_cuda_info_t* device_info; -static parsec_list_t* migrated_task_list; +parsec_device_cuda_info_t *device_info; +static parsec_list_t *migrated_task_list; static int NDEVICES; static parsec_hash_table_t *task_mapping_ht = NULL; static int task_migrated_per_tp = 0; static int tp_count; - double start = 0; double end = 0; PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); static parsec_key_fn_t task_mapping_table_generic_key_fn = { - .key_equal = parsec_hash_table_generic_64bits_key_equal, - .key_hash = parsec_hash_table_generic_64bits_key_hash, - .key_print = parsec_hash_table_generic_64bits_key_print -}; + .key_equal = parsec_hash_table_generic_64bits_key_equal, + .key_hash = parsec_hash_table_generic_64bits_key_hash, + .key_print = parsec_hash_table_generic_64bits_key_print}; int parsec_gpu_task_count_start; int parsec_gpu_task_count_end; static void task_mapping_ht_free_elt(void *_item, void *table) { - task_mapping_item_t *item = (task_mapping_item_t*)_item; + task_mapping_item_t *item = (task_mapping_item_t *)_item; parsec_key_t key = item->ht_item.key; parsec_hash_table_nolock_remove(table, key); free(item); } - -static inline void gpu_dev_profiling_init(void) -{ - static const char *gpu_dev_prof_info_str = "task_count{int32_t}"; +static void gpu_dev_profiling_init() +{ + //const char *gpu_dev_prof_info_str = "exec_time{double};device_index{int32_t};task_count{int32_t}"; parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", - sizeof(int32_t), - gpu_dev_prof_info_str, - &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); + sizeof(gpu_dev_prof_t), + "device_index{int32_t};task_count{int32_t}", + &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } /** * @brief The function initialises the data structures required * for inter-device migration. - * + * * @param ndevices number of devices - * @return int + * @return int */ int parsec_cuda_migrate_init(int ndevices) { int i, j; - - #if defined(PARSEC_HAVE_CUDA) + +#if defined(PARSEC_HAVE_CUDA) nvmlReturn_t nvml_ret; - #endif +#endif start = MPI_Wtime(); NDEVICES = ndevices; - device_info = (parsec_device_cuda_info_t *) calloc(ndevices, sizeof(parsec_device_cuda_info_t)); - migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t);; + device_info = (parsec_device_cuda_info_t *)calloc(ndevices, sizeof(parsec_device_cuda_info_t)); + migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t); + ; - for(i = 0; i < NDEVICES; i++) + for (i = 0; i < NDEVICES; i++) { - for(j = 0; j < EXECUTION_LEVEL; j++) + for (j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; device_info[i].load = 0; @@ -76,26 +74,24 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].received = 0; } - #if defined(PARSEC_HAVE_CUDA) +#if defined(PARSEC_HAVE_CUDA) nvml_ret = nvmlInit_v2(); - #endif +#endif task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); parsec_hash_table_init(task_mapping_ht, offsetof(task_mapping_item_t, ht_item), 16, task_mapping_table_generic_key_fn, NULL); - #if defined(PARSEC_PROF_TRACE) - gpu_dev_profiling_init(); - #endif +#if defined(PARSEC_PROF_TRACE) + gpu_dev_profiling_init(); +#endif char hostname[256]; gethostname(hostname, sizeof(hostname)); printf("PID %d on %s ready for attach\n", getpid(), hostname); - //sleep(60); + // sleep(60); printf("Migration module initialised for %d devices \n", NDEVICES); - return 0; - } int parsec_cuda_migrate_fini() @@ -103,84 +99,79 @@ int parsec_cuda_migrate_fini() int i; end = MPI_Wtime(); - - #if defined(PARSEC_HAVE_CUDA) + +#if defined(PARSEC_HAVE_CUDA) nvmlShutdown(); - #endif +#endif parsec_hash_table_for_all(task_mapping_ht, task_mapping_ht_free_elt, task_mapping_ht); parsec_hash_table_fini(task_mapping_ht); PARSEC_OBJ_RELEASE(task_mapping_ht); task_mapping_ht = NULL; - for(i = 0; i < NDEVICES; i++) + for (i = 0; i < NDEVICES; i++) { printf("\n*********** DEVICE %d *********** \n", i); printf("Total tasks executed: %d \n", device_info[i].total_tasks_executed); printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", - device_info[i].level0, device_info[i].level1, device_info[i].level2, - device_info[i].level0 + device_info[i].level1 + device_info[i].level2); - printf("Task check : level0 %d level1 %d level2 %d total %d \n", - parsec_cuda_get_device_task(i, 0), - parsec_cuda_get_device_task(i, 1), - parsec_cuda_get_device_task(i, 2), - parsec_cuda_get_device_task(i, -1)); + device_info[i].level0, device_info[i].level1, device_info[i].level2, + device_info[i].level0 + device_info[i].level1 + device_info[i].level2); + printf("Task check : level0 %d level1 %d level2 %d total %d \n", + parsec_cuda_get_device_task(i, 0), + parsec_cuda_get_device_task(i, 1), + parsec_cuda_get_device_task(i, 2), + parsec_cuda_get_device_task(i, -1)); printf("Task received : %d \n", device_info[i].received); - } - printf("\n---------Execution time = %lf ------------ \n", end - start); - PARSEC_OBJ_RELEASE(migrated_task_list); - free(device_info); + printf("\n---------Execution time = %lf ------------ \n", end - start); + PARSEC_OBJ_RELEASE(migrated_task_list); + free(device_info); printf("Migration module shut down \n"); return 0; - } - double current_time() { - return ( MPI_Wtime() - start); + return (MPI_Wtime() - start); } /** * @brief returns the load of a particular device - * + * * nvml_utilization has two fields - gpu and memory * gpu - Percent of time over the past sample period during which one or more kernels was executing on the GPU. * memory - Percent of time over the past sample period during which global (device) memory was being read or written - * + * * @param device index of the device - * @return int + * @return int */ int parsec_cuda_get_device_load(int device) { - #if defined(PARSEC_HAVE_CUDA) +#if defined(PARSEC_HAVE_CUDA) nvmlDevice_t nvml_device; nvmlUtilization_t nvml_utilization; nvmlReturn_t nvml_ret; - + nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); - nvml_ret = nvmlDeviceGetUtilizationRates ( nvml_device, &nvml_utilization); + nvml_ret = nvmlDeviceGetUtilizationRates(nvml_device, &nvml_utilization); device_info[device].load = nvml_utilization.gpu; - //printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); - #else - device_info[device].load = device_info[device].task_count; - #endif /* PARSEC_HAVE_CUDA */ +// printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); +#else + device_info[device].load = device_info[device].task_count; +#endif /* PARSEC_HAVE_CUDA */ return device_info[device].load; - } - /** * @brief sets the load of a particular device - * + * * @param device index of the device - * @return int + * @return int */ int parsec_cuda_set_device_load(int device, int load) @@ -189,32 +180,30 @@ int parsec_cuda_set_device_load(int device, int load) return rc + load; } - /** * @brief returns the number of tasks in a particular device - * + * * @param device index of the device * @param level level of execution - * @return int + * @return int */ int parsec_cuda_get_device_task(int device, int level) { - if( level == -1) + if (level == -1) return (device_info[device].task_count[0] + device_info[device].task_count[1] + device_info[device].task_count[2]); - + return device_info[device].task_count[level]; } - /** * @brief sets the number of tasks in a particular device - * + * * @param device index of the device * @param level level of execution - * @return int + * @return int */ int parsec_cuda_set_device_task(int device, int task_count, int level) @@ -223,12 +212,11 @@ int parsec_cuda_set_device_task(int device, int task_count, int level) return rc + task_count; } - /** * @brief sets the load of a particular device - * + * * @param device index of the device - * @return int + * @return int */ int parsec_cuda_tasks_executed(int device) @@ -239,91 +227,87 @@ int parsec_cuda_tasks_executed(int device) /** * @brief returns 1 if the device is starving, 0 if its is not - * + * * @param device index number of the device - * @return int + * @return int * * TODO: needs updation */ int is_starving(int device) { - return ( parsec_cuda_get_device_task(device, -1) < 5 ) ? 1 : 0; + return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; } - int will_starve(int device) { - return ( (parsec_cuda_get_device_task(device, -1) - 1) < 5 ) ? 1 : 0; + return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; } /** * @brief returns the index of a starving device and returns -1 * if no device is starving. - * + * * @param dealer_device device probing for a starving device * @param ndevice total number of devices - * @return int - * + * @return int + * * TODO: needs updation */ int find_starving_device(int dealer_device) { int i; - for(i = 0; i < NDEVICES; i++) + for (i = 0; i < NDEVICES; i++) { - if( i == dealer_device ) + if (i == dealer_device) continue; - if(is_starving(i)) + if (is_starving(i)) return i; } - return -1; + return -1; } - - /** - * @brief This function will be called in __parsec_context_wait() just before - * parsec_current_scheduler->module.select(). This will ensure that the migrated tasks - * will get priority over new tasks. - * - * When a compute thread calls this function, it is forced to try to be a manager of the - * a device. If the device already has a manager, the compute thread passes the control of - * the task to the manager. If not the compute thread will become the manager. - * - * @param es - * @return int + * @brief This function will be called in __parsec_context_wait() just before + * parsec_current_scheduler->module.select(). This will ensure that the migrated tasks + * will get priority over new tasks. + * + * When a compute thread calls this function, it is forced to try to be a manager of the + * a device. If the device already has a manager, the compute thread passes the control of + * the task to the manager. If not the compute thread will become the manager. + * + * @param es + * @return int */ -int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) +int parsec_cuda_mig_task_dequeue(parsec_execution_stream_t *es) { char tmp[128]; migrated_task_t *mig_task = NULL; parsec_gpu_task_t *migrated_gpu_task = NULL; - parsec_device_gpu_module_t* dealer_device = NULL; - parsec_device_gpu_module_t* starving_device = NULL; + parsec_device_gpu_module_t *dealer_device = NULL; + parsec_device_gpu_module_t *starving_device = NULL; int stage_in_status = 0; + mig_task = (migrated_task_t *)parsec_list_try_pop_front(migrated_task_list); - mig_task = (migrated_task_t*) parsec_list_try_pop_front(migrated_task_list); - - if(mig_task != NULL) - { - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); + if (mig_task != NULL) + { + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); migrated_gpu_task = mig_task->gpu_task; - assert( migrated_gpu_task->migrate_status != TASK_NOT_MIGRATED ); + assert(migrated_gpu_task->migrate_status != TASK_NOT_MIGRATED); dealer_device = mig_task->dealer_device; starving_device = mig_task->starving_device; stage_in_status = mig_task->stage_in_status; change_task_features(migrated_gpu_task, dealer_device, starving_device, stage_in_status); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); - parsec_atomic_fetch_inc_int32(&device_info[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); - parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *) migrated_gpu_task, starving_device->super.device_index); - PARSEC_OBJ_DESTRUCT(mig_task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + parsec_atomic_fetch_inc_int32(&device_info[CUDA_DEVICE_NUM(starving_device->super.device_index)].received); + parsec_cuda_kernel_scheduler(es, (parsec_gpu_task_t *)migrated_gpu_task, starving_device->super.device_index); + PARSEC_OBJ_DESTRUCT(mig_task); free(mig_task); return 1; @@ -332,23 +316,20 @@ int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es) return 0; } - - /** * This function enqueues the migrated task to a node level queue. * * Returns: negative number if any error occured. * positive: starving device index. */ -int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task) +int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t *mig_task) { - parsec_list_push_back((parsec_list_t*)migrated_task_list, (parsec_list_item_t*)mig_task); - + parsec_list_push_back((parsec_list_t *)migrated_task_list, (parsec_list_item_t *)mig_task); + parsec_gpu_task_t *migrated_gpu_task = mig_task->gpu_task; - parsec_device_gpu_module_t* starving_device = mig_task->starving_device; + parsec_device_gpu_module_t *starving_device = mig_task->starving_device; char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, - ((parsec_gpu_task_t *) migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Enqueue task %s to device queue %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), CUDA_DEVICE_NUM(starving_device->super.device_index)); (void)es; return 0; @@ -357,46 +338,46 @@ int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t /** * @brief check if there are any devices starving. If there are any starving device migrate * half the task from the dealer device to the starving device. - * - * @param es - * @param dealer_gpu_device - * @return int + * + * @param es + * @param dealer_gpu_device + * @return int */ -int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device) +int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device) { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0, k = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; - parsec_device_gpu_module_t* starving_device = NULL; + parsec_device_gpu_module_t *starving_device = NULL; migrated_task_t *mig_task = NULL; - dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if( will_starve(dealer_device_index) ) + dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); + if (will_starve(dealer_device_index)) return 0; - + starving_device_index = find_starving_device(dealer_device_index); - if(starving_device_index == -1) + if (starving_device_index == -1) return 0; - starving_device = (parsec_device_gpu_module_t*)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); + starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); /** * @brief Tasks are searched in different levels one by one. At this point we assume * that the cost of migration increases, as the level increase. */ - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( &(dealer_device->pending) ); //level 0 + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 execution_level = 0; - if(migrated_gpu_task == NULL) + if (migrated_gpu_task == NULL) { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[0]->fifo_pending ); //level 1 + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 execution_level = 1; - if( migrated_gpu_task == NULL) + if (migrated_gpu_task == NULL) { - for(j = 0; j < (dealer_device->max_exec_streams - 2); j++) + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) { - migrated_gpu_task = (parsec_gpu_task_t*)parsec_list_try_pop_back( dealer_device->exec_stream[ (2 + j) ]->fifo_pending ); //level2 - if(migrated_gpu_task != NULL) + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + if (migrated_gpu_task != NULL) { execution_level = 2; stream_index = 2 + j; @@ -405,44 +386,43 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu } } } - - if(migrated_gpu_task != NULL) + if (migrated_gpu_task != NULL) { - assert(migrated_gpu_task->ec != NULL); - //parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)migrated_gpu_task); + assert(migrated_gpu_task->ec != NULL); + // parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); /** * @brief if the task is a not a computational kerenel or if it is a task that has * already been migrated, we stop the migration. */ - if(migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { - if(execution_level == 0) - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t*) migrated_gpu_task ); - if(execution_level == 1) - parsec_list_push_back( dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); - if(execution_level == 2) - parsec_list_push_back( dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t*) migrated_gpu_task ); - + if (execution_level == 0) + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); + if (execution_level == 1) + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + if (execution_level == 2) + parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + return nb_migrated; } - assert( (migrated_gpu_task != NULL) && (migrated_gpu_task->ec != NULL) ); + assert((migrated_gpu_task != NULL) && (migrated_gpu_task->ec != NULL)); - if(execution_level == 0) + if (execution_level == 0) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); device_info[dealer_device_index].level0++; } - if(execution_level == 1) + if (execution_level == 1) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); device_info[dealer_device_index].level1++; } - if(execution_level == 2) + if (execution_level == 2) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); device_info[dealer_device_index].level2++; } nb_migrated++; @@ -452,36 +432,37 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu * @brief change migrate_status according to the status of the stage in of the * stage_in data. */ - if( execution_level == 2 ) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; else migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; /** * @brief An object of type migrated_task_t is created store the migrated task * and other associated details. This object is enqueued to a node level queue. - * The main objective of this was to make sure that the manager does not have to sepend + * The main objective of this was to make sure that the manager does not have to sepend * time on migration. It can select the task for migration, enqqueue it to the node level - * queue and then return to its normal working. + * queue and then return to its normal working. */ - mig_task = (migrated_task_t *) calloc(1, sizeof(migrated_task_t)); - PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; - //memset(migrated_gpu_task->posssible_candidate, -1, sizeof(int32_t)); - for( k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->posssible_candidate[k] = -1; + // memset(migrated_gpu_task->posssible_candidate, -1, sizeof(int32_t)); + for (k = 0; k < MAX_PARAM_COUNT; k++) + migrated_gpu_task->posssible_candidate[k] = -1; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)mig_task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); - parsec_cuda_mig_task_enqueue(es, mig_task); + parsec_cuda_mig_task_enqueue(es, mig_task); char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *) migrated_gpu_task)->ec), - execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); } - + migrated_gpu_task = NULL; ///* update the expected load on the GPU device */ parsec_device_load[dealer_device->super.device_index] -= nb_migrated * parsec_device_sweight[dealer_device->super.device_index]; @@ -491,128 +472,122 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu /** * @brief This function changes the features of a task, in a way that it is preped * for migration. - * - * @param gpu_task - * @param dealer_device - * @param stage_in_status - * @return int + * + * @param gpu_task + * @param dealer_device + * @param stage_in_status + * @return int */ -int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, - parsec_device_gpu_module_t* starving_device, int stage_in_status) +int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, int stage_in_status) { int i = 0; parsec_task_t *task = gpu_task->ec; parsec_data_copy_t *src_copy = NULL; char tmp[128]; - for(i = 0; i < task->task_class->nb_flows; i++) + for (i = 0; i < task->task_class->nb_flows; i++) { if (task->data[i].data_out == NULL) continue; - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) // CTL flow continue; /** * Data is already staged in the dealer device and we can find all the data * of the tasks to be migrated in the dealer device. */ - if( stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN ) - { - parsec_data_t* original = task->data[i].data_out->original; - parsec_atomic_lock( &original->lock ); + if (stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN) + { + parsec_data_t *original = task->data[i].data_out->original; + parsec_atomic_lock(&original->lock); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert( task->data[i].data_out->readers > 0 ); - - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + assert(task->data[i].data_out->readers > 0); + + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - - if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); - else - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + else + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } - if( (PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); - - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - - if( original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version ) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); - else - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t*)task->data[i].data_out); + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + else + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } - if( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers == 0); - parsec_list_item_ring_chop((parsec_list_item_t*)task->data[i].data_out); + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - assert( task->data[i].data_out->super.super.obj_reference_count == 1); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_out); - } + assert(task->data[i].data_out->super.super.obj_reference_count == 1); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + } PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, task->data[i].data_out->readers, - task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - - + "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + assert(task->data[i].data_out != NULL); - assert(original->device_copies[dealer_device->super.device_index]!= NULL); + assert(original->device_copies[dealer_device->super.device_index] != NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); assert(task->data[i].data_out->device_private != NULL); - assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); - - parsec_atomic_unlock( &original->lock ); - + assert(task->data[i].data_out->device_index == dealer_device->super.device_index); + parsec_atomic_unlock(&original->lock); } - else + else { - assert( task->data[i].data_in != NULL); - if( (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && - (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version) ) + assert(task->data[i].data_in != NULL); + if ((task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && + (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version)) { - parsec_data_t* original = task->data[i].data_out->original; + parsec_data_t *original = task->data[i].data_out->original; assert(original->device_copies[0] != NULL); assert(original->device_copies[original->owner_device] != NULL); - - parsec_atomic_lock( &original->lock ); - + + parsec_atomic_lock(&original->lock); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in + * us to easily find the stage_in data as the possible candidate in * parsec_gpu_data_stage_in() function. */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - assert( task->data[i].data_out->device_index == dealer_device->super.device_index ); + assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] possiible candidate from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, task->data[i].data_out->readers, - task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + "Migrate: data %p attached to original %p [readers %d, ref_count %d] possiible candidate from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - parsec_atomic_unlock( &original->lock ); + parsec_atomic_unlock(&original->lock); } } } @@ -620,29 +595,27 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t return 0; } - -int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device) +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device) { int i; parsec_task_t *task = gpu_task->ec; - for(i = 0; i < task->task_class->nb_flows; i++) + for (i = 0; i < task->task_class->nb_flows; i++) { if (task->data[i].data_out == NULL) continue; - if(PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) //CTL flow + if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) // CTL flow continue; - if( (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) - && (gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH) ) + if ((PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags) && (gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH)) { - parsec_gpu_data_copy_t* gpu_elem = task->data[i].data_out; - gpu_elem->version++; /* on to the next version */ + parsec_gpu_data_copy_t *gpu_elem = task->data[i].data_out; + gpu_elem->version++; /* on to the next version */ PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "GPU[%s]: GPU copy %p [ref_count %d] increments version to %d at %s:%d", - gpu_device->super.name, - gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->version, - __FILE__, __LINE__); + "GPU[%s]: GPU copy %p [ref_count %d] increments version to %d at %s:%d", + gpu_device->super.name, + gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->version, + __FILE__, __LINE__); } } @@ -655,7 +628,7 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) task_mapping_item_t *item; key = task->task_class->make_key(task->taskpool, task->locals); - if( NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key)) ) + if (NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key))) { item = (task_mapping_item_t *)malloc(sizeof(task_mapping_item_t)); @@ -666,7 +639,7 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) parsec_hash_table_unlock_bucket(task_mapping_ht, key); } else - item->ht_item.key = key; + item->ht_item.key = key; } int find_task_to_device_mapping(parsec_task_t *task) @@ -675,10 +648,10 @@ int find_task_to_device_mapping(parsec_task_t *task) task_mapping_item_t *item; key = task->task_class->make_key(task->taskpool, task->locals); - if( NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key)) ) + if (NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key))) return -1; - - return item->device_index; + + return item->device_index; } void clear_task_migrated_per_tp() @@ -696,14 +669,3 @@ int get_tp_count() { return tp_count; } - - - - - - - - - - - diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index ba34ff3bc..70df9d5cc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -1,7 +1,6 @@ #ifndef PARSEC_DEVICE_CUDA_MIGRATE_H #define PARSEC_DEVICE_CUDA_MIGRATE_H - #include "parsec/parsec_config.h" #include "parsec/parsec_internal.h" #include "parsec/utils/zone_malloc.h" @@ -14,21 +13,21 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) -#define TASK_NOT_MIGRATED 0 +#define TASK_NOT_MIGRATED 0 #define TASK_MIGRATED_BEFORE_STAGE_IN 1 #define TASK_MIGRATED_AFTER_STAGE_IN 2 /** - * @brief + * @brief * level 0 - task has been enqueued to the pending queue of the device. It has not been progressed. - * level 1 - task has been dequeued from the pending queue of the device and it has been moved to + * level 1 - task has been dequeued from the pending queue of the device and it has been moved to * the queue that deals with movement of the task data to the GPU, but has not yet been moved * level 2 - task data has been moved to the GPU, GPU is in control of the data and Task. - * + * */ #define EXECUTION_LEVEL 3 -typedef struct parsec_device_cuda_info_s +typedef struct parsec_device_cuda_info_s { int total_tasks_executed; int task_count[EXECUTION_LEVEL]; @@ -41,29 +40,24 @@ typedef struct parsec_device_cuda_info_s typedef struct migrated_task_s { - parsec_list_item_t list_item; - parsec_gpu_task_t* gpu_task; - parsec_device_gpu_module_t* dealer_device; - parsec_device_gpu_module_t* starving_device; + parsec_list_item_t list_item; + parsec_gpu_task_t *gpu_task; + parsec_device_gpu_module_t *dealer_device; + parsec_device_gpu_module_t *starving_device; int stage_in_status; } migrated_task_t; typedef struct task_mapping_item_s { - parsec_hash_table_item_t ht_item; - int device_index; + parsec_hash_table_item_t ht_item; + int device_index; } task_mapping_item_t; -//typedef struct gpu_dev_prof_s -//{ -// int32_t device_index; -// int32_t task_count; -//} gpu_dev_prof_t; -//static const char *gpu_dev_prof_info_str = "int32_t{device_index};int32_t{task_count}"; - -//static int parsec_gpu_task_count_start; -//static int parsec_gpu_task_count_end; - +typedef struct gpu_dev_prof_s +{ + int32_t device_index; + int32_t task_count; +} gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); @@ -74,21 +68,21 @@ int parsec_cuda_set_device_task(int device, int task_count, int level); int parsec_cuda_tasks_executed(int device); int is_starving(int device); int find_starving_device(int dealer_device); -parsec_device_gpu_module_t* parsec_cuda_change_device( int dealer_device_index); -int parsec_cuda_mig_task_enqueue( parsec_execution_stream_t *es, migrated_task_t *mig_task); -int parsec_cuda_mig_task_dequeue( parsec_execution_stream_t *es); -int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device, - parsec_gpu_task_t* migrated_gpu_task); -int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t* dealer_device); -int parsec_gpu_data_reserve_device_space_for_flow( parsec_device_gpu_module_t* gpu_device, - parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); -int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* src_dev, - parsec_device_gpu_module_t* dest_dev); -int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* dealer_device, - parsec_device_gpu_module_t* starving_device, int stage_in_status); -int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t* gpu_device); -int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t* data, int flow_index, - parsec_data_copy_t* src_copy, parsec_data_copy_t* dst_copy, +parsec_device_gpu_module_t *parsec_cuda_change_device(int dealer_device_index); +int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t *mig_task); +int parsec_cuda_mig_task_dequeue(parsec_execution_stream_t *es); +int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t *migrated_gpu_task); +int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device); +int parsec_gpu_data_reserve_device_space_for_flow(parsec_device_gpu_module_t *gpu_device, + parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); +int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *src_dev, + parsec_device_gpu_module_t *dest_dev); +int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, int stage_in_status); +int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); +int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t *data, int flow_index, + parsec_data_copy_t *src_copy, parsec_data_copy_t *dst_copy, uint8_t stage_in_device, uint8_t access_mode); double current_time(); int update_task_to_device_mapping(parsec_task_t *task, int device_index); @@ -96,11 +90,5 @@ int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); void print_task_migrated_per_tp(); int get_tp_count(); -static inline void gpu_dev_profiling_init(void); - #endif - - - - diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 27a2bb613..79a2589c5 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1326,7 +1326,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_device_gpu_module_t *gpu_device = &cuda_device->super; int32_t type = flow->flow_flags; parsec_data_copy_t* in_elem = task_data->data_in; - parsec_data_copy_t* release_after_data_in_is_attached = NULL; parsec_data_t* original = in_elem->original; parsec_gpu_data_copy_t* gpu_elem = task_data->data_out; uint32_t nb_elts = gpu_task->flow_nb_elts[flow->flow_index]; @@ -2771,13 +2770,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); - - #if defined(PARSEC_PROF_TRACE) - parsec_profiling_trace_flags(es->es_profile, - parsec_gpu_task_count_start, - (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), - gpu_device->super.device_index, &gpu_device->mutex, 0); - #endif return PARSEC_HOOK_RETURN_ASYNC; } PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Entering GPU management at %s:%d", @@ -2789,13 +2781,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, (unsigned long)es, PROFILE_OBJECT_ID_NULL, NULL ); #endif /* defined(PARSEC_PROF_TRACE) */ -#if defined(PARSEC_PROF_TRACE) - parsec_profiling_trace_flags(es->es_profile, - parsec_gpu_task_count_start, - (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), - gpu_device->super.device_index, &gpu_device->mutex, 0); -#endif - status = cudaSetDevice( cuda_device->cuda_index ); PARSEC_CUDA_CHECK_ERROR( "(parsec_cuda_kernel_scheduler) cudaSetDevice ", status, {return PARSEC_HOOK_RETURN_DISABLE;} ); @@ -2807,6 +2792,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_device->super.name, parsec_gpu_describe_gpu_task(tmp, MAX_TASK_STRLEN, gpu_task), gpu_task->ec->priority ); + + #if defined(PARSEC_PROF_TRACE) + parsec_profiling_trace_flags(es->es_profile, + parsec_gpu_task_count_start, + (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), + gpu_task->ec->taskpool->taskpool_id, NULL, 0); + #endif } rc = progress_stream( gpu_device, @@ -2976,10 +2968,13 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_device->super.executed_tasks++; #if defined(PARSEC_PROF_TRACE) + gpu_dev_prof_t prof_info; + prof_info.device_index = gpu_device->super.device_index; + prof_info.task_count = gpu_device->mutex; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), - gpu_device->super.device_index, &gpu_device->mutex, 0); + gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); #endif /** From 85c839771acfa1b3334906d6a2acb98d97c4880c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 14 Jul 2022 17:10:50 -0400 Subject: [PATCH 121/215] parsec_get_best_device() updated --- parsec/mca/device/device.c | 122 +++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 297cfa094..aeb16257f 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -70,6 +70,7 @@ float *parsec_device_tweight = NULL; * execution context is not released). * -1 - if the kernel is scheduled to be executed on a GPU. */ +#if 0 int parsec_get_best_device( parsec_task_t* this_task, double ratio ) { @@ -180,14 +181,123 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } + //dev_index = 2; + return dev_index; +} +#endif - /** - * update task to device mapping. The code control reaches here only if there was no - * previous mapping available +static int parsec_device_load_balance_skew = 20; +static float load_balance_skew; + +int parsec_get_best_device( parsec_task_t* this_task, double ratio ) +{ + int i, dev_index = -1, data_index, prefer_index = -1; + parsec_taskpool_t* tp = this_task->taskpool; + + /* Select the location of the first data that is used in READ/WRITE or pick the + * location of one of the READ data. For now use the last one. */ - - //if(parsec_cuda_iterative) - // update_task_to_device_mapping(this_task, dev_index); + for( i = 0; i < this_task->task_class->nb_flows; i++ ) { + /* Make sure data_in is not NULL */ + if( NULL == this_task->data[i].data_in ) continue; + /* And that we have a data (aka it is not NEW) */ + if( NULL == this_task->data[i].source_repo_entry ) continue; + + /* Data is updated by the task, and we try to minimize the data movements */ + if( (NULL != this_task->task_class->out[i]) && + (this_task->task_class->out[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { + + data_index = this_task->task_class->out[i]->flow_index; + /* If the data has a preferred device, try to obey it. */ + if( this_task->data[data_index].data_in->original->preferred_device > 1 ) { /* no CPU or recursive */ + dev_index = this_task->data[data_index].data_in->original->preferred_device; + break; + } + /* Data is located on a device */ + if( this_task->data[data_index].data_in->original->owner_device > 1 ) { /* no CPU or recursive */ + dev_index = this_task->data[data_index].data_in->original->owner_device; + break; + } + } + /* If we reach here, we cannot yet decide which device to run on based on the WRITE + * constraints, so let's pick the data for a READ flow. + */ + data_index = this_task->task_class->in[i]->flow_index; + if( this_task->data[data_index].data_in->original->preferred_device > 1 ) { + prefer_index = this_task->data[data_index].data_in->original->preferred_device; + } else if( this_task->data[data_index].data_in->original->owner_device > 1 ) { + prefer_index = this_task->data[data_index].data_in->original->owner_device; + } + } + + /* 0 is CPU, and 1 is recursive device */ + if( dev_index <= 1 ) { /* This is the first time we see this data for a GPU, let's decide which GPU will work on it. */ + int best_index; + float weight, best_weight = parsec_device_load[0] + ratio * parsec_device_sweight[0]; + + /* Warn if there is no valid device for this task */ + for(best_index = 0; best_index < parsec_mca_device_enabled(); best_index++) { + parsec_device_module_t *dev = parsec_mca_device_get(best_index); + + /* Skip the device if it is not configured */ + if(!(tp->devices_index_mask & (1 << best_index))) continue; + /* Stop on this device if there is an incarnation for it */ + for(i = 0; NULL != this_task->task_class->incarnations[i].hook; i++) + if( (this_task->task_class->incarnations[i].type == dev->type) && (this_task->chore_mask & (1<task_class->incarnations[i].hook) && (this_task->chore_mask & (1 << i))) + break; + } + if(parsec_mca_device_enabled() == best_index) { + /* We tried all possible devices, and none of them have an implementation + * for this task! */ + parsec_warning("*** Task class '%s' has no valid implementation for the available devices", + this_task->task_class->name); + return -1; + } + + /* If we have a preferred device, start with it, but still consider + * other options to have some load balance */ + if( -1 != prefer_index ) { + best_index = prefer_index; + /* we still prefer this device, until it is twice as loaded as the + * real best load balance device */ + best_weight = load_balance_skew * (parsec_device_load[prefer_index] + ratio * parsec_device_sweight[prefer_index]); + } + + /* Consider how adding the current task would change load balancing + * betwen devices */ + /* Start at 2, to skip the recursive body */ + for( dev_index = 2; dev_index < parsec_mca_device_enabled(); dev_index++ ) { + /* Skip the device if it is not configured */ + if(!(tp->devices_index_mask & (1 << dev_index))) continue; + weight = parsec_device_load[dev_index] + ratio * parsec_device_sweight[dev_index]; + if( best_weight > weight ) { + best_index = dev_index; + best_weight = weight; + } + } + // Load problem: was nothing to do here + parsec_device_load[best_index] += ratio * parsec_device_sweight[best_index]; + assert( best_index != 1 ); + dev_index = best_index; + } + + /* Sanity check: if at least one of the data copies is not parsec + * managed, check that all the non-parsec-managed data copies + * exist on the same device */ + for( i = 0; i < this_task->task_class->nb_flows; i++ ) { + /* Make sure data_in is not NULL */ + if (NULL == this_task->data[i].data_in) continue; + if ((this_task->data[i].data_in->flags & PARSEC_DATA_FLAG_PARSEC_MANAGED) == 0 && + this_task->data[i].data_in->device_index != dev_index) { + char task_str[MAX_TASK_STRLEN]; + parsec_fatal("*** User-Managed Copy Error: Task %s is selected to run on device %d,\n" + "*** but flow %d is represented by a data copy not managed by PaRSEC,\n" + "*** and does not have a copy on that device\n", + parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); + } + } return dev_index; } From 575d0b5678ef51ab44977f69cbfb64060639ddc9 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 14 Jul 2022 21:32:31 -0400 Subject: [PATCH 122/215] migrate workflow changed. The position of migration changed to just after task completion. --- parsec/mca/device/cuda/device_cuda_module.c | 67 +++++++++++++-------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 79a2589c5..0c951a8a5 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2885,30 +2885,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, out_task_pop = progress_task; fetch_task_from_shared_queue: - - //printf(" tp %d time %lf device %d level0 %d level1 %d level2 %d total %d \n", - // get_tp_count(), - // current_time(), - // CUDA_DEVICE_NUM(gpu_device->super.device_index), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); - - /** - * @brief Before a new task is selectd by the device manager for execution, - * the manager checks if there are any starving devices and migrate tasks, - * to the starving device, if there are available tasks to migrate. - * - * rc will return the total number of tasks selected for migration and that - * is deducted from the total number of tasks that will be executed by this - * GPU. - */ - if(parsec_cuda_migrate_tasks) - nb_migrated = migrate_to_starving_device(es, gpu_device); - if( nb_migrated > 0 ) - goto crappy_code; - + assert( NULL == gpu_task ); if (1 == parsec_cuda_sort_pending && out_task_submit == NULL && out_task_pop == NULL) { parsec_gpu_sort_pending_list(gpu_device); @@ -2999,7 +2976,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream,"GPU[%s]: gpu_task %p freed at %s:%d", gpu_device->super.name, gpu_task, __FILE__, __LINE__); free( gpu_task ); -crappy_code: rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); if( 1 == rc ) { /* I was the last one */ #if defined(PARSEC_PROF_TRACE) @@ -3013,6 +2989,47 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, return PARSEC_HOOK_RETURN_ASYNC; } gpu_task = progress_task; + + //printf(" tp %d time %lf device %d level0 %d level1 %d level2 %d total %d \n", + // get_tp_count(), + // current_time(), + // CUDA_DEVICE_NUM(gpu_device->super.device_index), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), + // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); + + /** + * @brief Before a new task is selectd by the device manager for execution, + * the manager checks if there are any starving devices and migrate tasks, + * to the starving device, if there are available tasks to migrate. + * + * rc will return the total number of tasks selected for migration and that + * is deducted from the total number of tasks that will be executed by this + * GPU. + */ + if(parsec_cuda_migrate_tasks) + { + nb_migrated = migrate_to_starving_device(es, gpu_device); + if( nb_migrated > 0 ) + { + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); + if( 1 == rc ) + { /* I was the last one */ + #if defined(PARSEC_PROF_TRACE) + if( parsec_gpu_trackable_events & PARSEC_PROFILE_GPU_TRACK_OWN ) + PARSEC_PROFILING_TRACE( es->es_profile, parsec_gpu_own_GPU_key_end, + (unsigned long)es, PROFILE_OBJECT_ID_NULL, NULL ); + #endif /* defined(PARSEC_PROF_TRACE) */ + PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Leaving GPU management at %s:%d", + gpu_device->super.name, __FILE__, __LINE__); + + return PARSEC_HOOK_RETURN_ASYNC; + } + + } + } + goto fetch_task_from_shared_queue; disable_gpu: From 0792499b226c6a2f93435260f17736cf5ffffaed Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 14 Jul 2022 22:38:23 -0400 Subject: [PATCH 123/215] reader increment condition updated --- parsec/mca/device/cuda/device_cuda_module.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 0c951a8a5..4afa21a77 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1378,8 +1378,11 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); - PARSEC_DATA_COPY_INC_READERS_ATOMIC(in_elem); - undo_readers_inc_if_no_transfer = 1; + if(gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + PARSEC_DATA_COPY_INC_READERS_ATOMIC(in_elem); + undo_readers_inc_if_no_transfer = 1; + } goto src_selected; } From 0b8e287381e3e59e927d362495de42f761d2da44 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 15 Jul 2022 19:25:25 -0400 Subject: [PATCH 124/215] extra condition added to make sure that the data_out is not pointing to a junk value --- parsec/mca/device/cuda/device_cuda_migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 01c9427df..16c268a0e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -561,7 +561,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t else { assert(task->data[i].data_in != NULL); - if ((task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && + + if ((task->data[i].data_in->original == task->data[i].data_out->original) && + (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version)) { parsec_data_t *original = task->data[i].data_out->original; From 27746610cb5c795e35550c9b360becd4d41fd117 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 11:47:46 -0400 Subject: [PATCH 125/215] refined task counting to count only gpu tasks of type PARSEC_GPU_TASK_TYPE_KERNEL. --- parsec/mca/device/cuda/device_cuda_module.c | 32 +++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 4afa21a77..70b98b012 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2124,19 +2124,23 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, * task first try to see if anything completed. */ if( NULL != task ) { PARSEC_PUSH_TASK(stream->fifo_pending, (parsec_list_item_t*)task); - task = NULL; - if(stream == gpu_device->exec_stream[0]) - { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); - } - else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) + if (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); + if(stream == gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); + } + else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); + } } + task = NULL; + } *out_task = NULL; progress_fct = upstream_progress_fct; @@ -2769,7 +2773,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); @@ -2970,8 +2975,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec); __parsec_schedule(es, gpu_task->ec, distance); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); - parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); + parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); + } remove_gpu_task: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; From 44ca2d9309665941b623166f1733ef5948de42d4 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 13:15:42 -0400 Subject: [PATCH 126/215] documentation updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 70 ++++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 16c268a0e..44d24d629 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -31,7 +31,7 @@ static void task_mapping_ht_free_elt(void *_item, void *table) static void gpu_dev_profiling_init() { - //const char *gpu_dev_prof_info_str = "exec_time{double};device_index{int32_t};task_count{int32_t}"; + // const char *gpu_dev_prof_info_str = "exec_time{double};device_index{int32_t};task_count{int32_t}"; parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), "device_index{int32_t};task_count{int32_t}", @@ -231,15 +231,24 @@ int parsec_cuda_tasks_executed(int device) * @param device index number of the device * @return int * - * TODO: needs updation */ int is_starving(int device) { + /** + * @brief The default number of execution stream in PaRSEC is 2. We assume + * starvtion if the number of ready tasks available is less than twice the + * number of execution stream. + */ return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; } int will_starve(int device) { + /** + * @brief The default number of execution stream in PaRSEC is 2. We assume + * starvtion if migrating a task will push the number of ready tasks available + * to less than twice the number of execution stream. + */ return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; } @@ -251,7 +260,6 @@ int will_starve(int device) * @param ndevice total number of devices * @return int * - * TODO: needs updation */ int find_starving_device(int dealer_device) { @@ -337,7 +345,7 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t /** * @brief check if there are any devices starving. If there are any starving device migrate - * half the task from the dealer device to the starving device. + * task from the dealer device to the starving device. * * @param es * @param dealer_gpu_device @@ -367,7 +375,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ */ migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 execution_level = 0; - if (migrated_gpu_task == NULL) + if (migrated_gpu_task == NULL) { migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 execution_level = 1; @@ -390,11 +398,10 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ if (migrated_gpu_task != NULL) { assert(migrated_gpu_task->ec != NULL); - // parsec_list_item_ring_chop( (parsec_list_item_t*)migrated_gpu_task ); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); /** * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration. + * already been migrated, we stop the migration and push it back to the queue. */ if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { @@ -441,7 +448,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ * @brief An object of type migrated_task_t is created store the migrated task * and other associated details. This object is enqueued to a node level queue. * The main objective of this was to make sure that the manager does not have to sepend - * time on migration. It can select the task for migration, enqqueue it to the node level + * time on migration. It can select the task for migration, enqueue it to the node level * queue and then return to its normal working. */ mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); @@ -464,7 +471,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } migrated_gpu_task = NULL; - ///* update the expected load on the GPU device */ + /* update the expected load on the GPU device */ parsec_device_load[dealer_device->super.device_index] -= nb_migrated * parsec_device_sweight[dealer_device->super.device_index]; return nb_migrated; } @@ -504,8 +511,19 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_lock(&original->lock); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + /** + * Even if the task has only read access, the data may have been modified + * by another task, and it may be 'dirty'. We check the version of the data + * to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, + * if not is is pused to gpu_mem_lru. + */ if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { @@ -519,6 +537,12 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } + /** + * If the task has only read.wtite access, the data may have been modified + * by another task, and it may be 'dirty'. We check the version of the data + * to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, + * if not is is pused to gpu_mem_lru. + */ if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { @@ -532,6 +556,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } + /** + * If the task has a write only option, the taks should have written to it. But as we + * are migrating the task this write will never happen. So this data can be evicted + * immediatly. To ensure this eviction, we push the data to gpu_mem_lru. + */ if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { @@ -558,12 +587,15 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_atomic_unlock(&original->lock); } - else + else //TASK_MIGRATED_BEFORE_STAGE_IN { assert(task->data[i].data_in != NULL); - - if ((task->data[i].data_in->original == task->data[i].data_out->original) && + + if (/* This condition is required as task->data[i].data_out may be poitining to a junk value*/ + (task->data[i].data_in->original == task->data[i].data_out->original) && + /* If its not the owner then the existing stage_in mechanism will take careof the rest*/ (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && + /* If dealer device does not have a new version then then the existing stage_in mechanism will take careof the rest*/ (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version)) { parsec_data_t *original = task->data[i].data_out->original; @@ -624,6 +656,13 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo return 0; } +/** + * @brief Associate a task with a particular device_index. + * + * @param task + * @param device_index + * @return int + */ int update_task_to_device_mapping(parsec_task_t *task, int device_index) { parsec_key_t key; @@ -644,6 +683,13 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) item->ht_item.key = key; } +/** + * @brief Check if the task has any particular task mapping, + * if it has return the device_index, or else return -1. + * + * @param task + * @return int + */ int find_task_to_device_mapping(parsec_task_t *task) { parsec_key_t key; From 948c2bbab9d8e9ba42e2ce6fcd376ef4f59cc003 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 13:43:03 -0400 Subject: [PATCH 127/215] mem LRU corrected. If the data may have been modified by another task, and it may be 'dirty'. We check the version of the data to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, if not is is pused to gpu_mem_lru. --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 44d24d629..a70f28198 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -533,9 +533,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); + else + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** * If the task has only read.wtite access, the data may have been modified @@ -552,9 +552,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - else parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); + else + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** * If the task has a write only option, the taks should have written to it. But as we From c8afe5d9c66ef12e9b592be4f8feb95d989e9bcf Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 17:30:48 -0400 Subject: [PATCH 128/215] if the migrated task is one whose data has already been staged in we will always use the posssible candidate we have identified. --- parsec/mca/device/cuda/device_cuda_module.c | 29 +++++++++------------ 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 70b98b012..ec1555486 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1345,8 +1345,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * If the data will be accessed in write mode, remove it from any GPU data management * lists until the task is completed. */ - if( PARSEC_FLOW_ACCESS_WRITE & type ) { - if (gpu_elem->readers > 0 ) { + if(PARSEC_FLOW_ACCESS_WRITE & type) + { + if (gpu_elem->readers > 0 ) + { if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " "(possible anti-dependency,\n" @@ -1371,19 +1373,18 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, || (gpu_task->migrate_status > TASK_NOT_MIGRATED) /* make sure limitation does not affect migrated tasks */) { int potential_alt_src = 0; - if( PARSEC_DEV_CUDA == in_elem_dev->super.super.type ) { - if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { + if( (PARSEC_DEV_CUDA == in_elem_dev->super.super.type) && + /* if the migrated task is one whose data has already been staged in we will always + use the posssible candidate we have identified*/ + (gpu_task->migrate_status != TASK_MIGRATED_AFTER_STAGE_IN)) + { + if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) + { /* We can directly do D2D, so let's skip the selection */ - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); - if(gpu_task->migrate_status > TASK_NOT_MIGRATED) - { - PARSEC_DATA_COPY_INC_READERS_ATOMIC(in_elem); - undo_readers_inc_if_no_transfer = 1; - } - goto src_selected; } } @@ -1480,12 +1481,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_OBJ_RELEASE(task_data->data_in); } - /** - * Why do we need this increment. For some reason, if this increment is not - * done the reader goes to 0, when the number of CUDA device is greater than 2. - */ - //if(parsec_cuda_migrate_tasks) - // PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; From b1a22147d5ba5ffbffce7dc75ad18ef0a4aaa86c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 18:48:14 -0400 Subject: [PATCH 129/215] Anti depdendency like behaviour may happen during task migration. Extra condition added to sidestep this case. --- parsec/mca/device/cuda/device_cuda_module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index ec1555486..02b01c391 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1349,7 +1349,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { if (gpu_elem->readers > 0 ) { - if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { + if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) && + /* Anti depdendency like behaviour may happen during task migration */ + (gpu_task->migrate_status > TASK_NOT_MIGRATED) ) { parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " "(possible anti-dependency,\n" "or concurrent accesses), please prevent that with CTL dependencies\n", From 9df54a73a6f0eef4f0b04aa1a4abd05ec4e2a39c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 16 Jul 2022 19:44:50 -0400 Subject: [PATCH 130/215] parsec_data_copy_detach() updated. Lock removed as it was creating deadlocks in some instance. For instance when detach was being called from parsec_gpu_data_reserve_device_space(). --- parsec/data.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 52a1ff8e7..56667792e 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -205,8 +205,6 @@ int parsec_data_copy_detach(parsec_data_t* data, */ if( data->owner_device == device) { - parsec_atomic_lock( &data->lock ); - for( i = 0; i < parsec_nb_devices; i++ ) { if( i == device) continue; @@ -219,29 +217,25 @@ int parsec_data_copy_detach(parsec_data_t* data, data->owner_device = data->device_copies[i]->device_index; new_owner_copy = data->device_copies[i]; + break; } - parsec_atomic_unlock( &data->lock ); - if( (new_owner_copy == NULL) && (younger_version == -1) ) { PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "DEV[%d]: parsec_data_copy_detach failed to identify new owner (last copy): original %p device_copy %p", device, data, copy); - data->owner_device = -1; } if( (new_owner_copy == NULL) && (device > 1) && (younger_version > -1) ) { PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "DEV[%d]: parsec_data_copy_detach failed to identify new owner (younger version exists in device %d): original %p device_copy %p", device, younger_version, data, copy); - //assert(0); } if( new_owner_copy != NULL ) PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, "DEV[%d]: identified new owner for original %p : device_copy %p on device_index %d (old owner was copy %p on device_index %d)", device, data, new_owner_copy, new_owner_copy->device_index, copy, copy->device_index); - } parsec_data_copy_t* obj = data->device_copies[device]; From 81f82499399391be979acb8937d0b80152ec8042 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Jul 2022 09:53:04 -0400 Subject: [PATCH 131/215] unnecessary locks removed --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index a70f28198..9f53c4d2b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -508,7 +508,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if (stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN) { parsec_data_t *original = task->data[i].data_out->original; - parsec_atomic_lock(&original->lock); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** @@ -584,8 +583,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out->device_index == dealer_device->super.device_index); assert(task->data[i].data_out->device_private != NULL); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - - parsec_atomic_unlock(&original->lock); } else //TASK_MIGRATED_BEFORE_STAGE_IN { @@ -603,8 +600,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[0] != NULL); assert(original->device_copies[original->owner_device] != NULL); - parsec_atomic_lock(&original->lock); - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** * we set a possible candidate for this flow of the task. This will allow @@ -621,7 +616,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - parsec_atomic_unlock(&original->lock); } } } From a16f91f3a4169eb9ae33a20fa9b4283c6f013273 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Jul 2022 11:32:49 -0400 Subject: [PATCH 132/215] use a round robin method to find starving device --- parsec/mca/device/cuda/device_cuda_migrate.c | 18 +++++++++++++----- parsec/mca/device/cuda/device_cuda_migrate.h | 2 ++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9f53c4d2b..c471d599b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -72,6 +72,8 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].level2 = 0; device_info[i].total_tasks_executed = 0; device_info[i].received = 0; + device_info[i].last_device = i; + device_info[i].iam_starving = 1; } #if defined(PARSEC_HAVE_CUDA) @@ -177,7 +179,6 @@ int parsec_cuda_get_device_load(int device) int parsec_cuda_set_device_load(int device, int load) { int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); - return rc + load; } /** @@ -263,14 +264,20 @@ int will_starve(int device) */ int find_starving_device(int dealer_device) { - int i; + int i = 0; + int starving_device = 0; + int next_device = ( (device_info[dealer_device].last_device) + 1) / NDEVICES; + int final_device = next_device + NDEVICES; - for (i = 0; i < NDEVICES; i++) + // use a round robin method to find starving device + for(i = next_device; i < final_device; i++) { - if (i == dealer_device) + starving_device = i % NDEVICES; + + if (starving_device == dealer_device) continue; - if (is_starving(i)) + if (is_starving(starving_device)) return i; } @@ -463,6 +470,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); + device_info[dealer_device_index].last_device = starving_device_index; char tmp[MAX_TASK_STRLEN]; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 70df9d5cc..7f1513f49 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -36,6 +36,8 @@ typedef struct parsec_device_cuda_info_s int level1; int level2; int received; + int last_device; + int iam_starving; } parsec_device_cuda_info_t; typedef struct migrated_task_s From 515eda2efee2515348782cf69226cf0bb794b073 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Jul 2022 14:57:45 -0400 Subject: [PATCH 133/215] debug streams updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c471d599b..1966792a2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -579,7 +579,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, @@ -618,7 +618,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] possiible candidate from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, From b3b47ae50a03484dad8ca24179cd9ffba7764172 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Jul 2022 18:12:31 -0400 Subject: [PATCH 134/215] find_starving_device() corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1966792a2..dcea25803 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -266,7 +266,7 @@ int find_starving_device(int dealer_device) { int i = 0; int starving_device = 0; - int next_device = ( (device_info[dealer_device].last_device) + 1) / NDEVICES; + int next_device = ( (device_info[dealer_device].last_device) + 1) % NDEVICES; int final_device = next_device + NDEVICES; // use a round robin method to find starving device @@ -278,7 +278,7 @@ int find_starving_device(int dealer_device) continue; if (is_starving(starving_device)) - return i; + return starving_device; } return -1; From b059e45776f1b953b992bdfdcc95f4d01d010c52 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 18 Jul 2022 21:35:28 -0400 Subject: [PATCH 135/215] A 'chunk' of tasks migrated to the starving device, instead of just one task. --- .../mca/device/cuda/device_cuda_component.c | 4 + parsec/mca/device/cuda/device_cuda_migrate.c | 189 ++++++++++-------- parsec/mca/device/cuda/device_cuda_module.c | 5 +- 3 files changed, 109 insertions(+), 89 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index c1e4bdd22..750be42bb 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -42,6 +42,7 @@ int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_me char* parsec_cuda_lib_path = NULL; int parsec_cuda_migrate_tasks = 0; int parsec_cuda_iterative = 0; +int parsec_cuda_migrate_chunk_size = 0; static int cuda_mask, cuda_nvlink_mask; @@ -205,6 +206,9 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "iterative", "Boolean to let the GPU know the workload is iterative", false, false, 0, &parsec_cuda_iterative); + (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_chunk_size", + "Integer to let the GPU know the number of tasks to be migrated in a single go", + false, false, 5, &parsec_cuda_migrate_chunk_size); #if defined(PARSEC_PROF_TRACE) (void)parsec_mca_param_reg_int_name("device_cuda", "one_profiling_stream_per_cuda_stream", diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index dcea25803..e12d09945 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,6 +1,8 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" extern int parsec_device_cuda_enabled; +extern int parsec_cuda_migrate_chunk_size; + parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; static int NDEVICES; @@ -362,7 +364,7 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device) { int starving_device_index = -1, dealer_device_index = 0; - int nb_migrated = 0, execution_level = 0, stream_index = 0, j = 0, k = 0; + int nb_migrated = 0, execution_level = 0, stream_index = 0, i = 0, j = 0, k = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; migrated_task_t *mig_task = NULL; @@ -376,107 +378,120 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ return 0; starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - /** - * @brief Tasks are searched in different levels one by one. At this point we assume - * that the cost of migration increases, as the level increase. - */ - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 - execution_level = 0; - if (migrated_gpu_task == NULL) + for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 - execution_level = 1; - + /** + * @brief Tasks are searched in different levels one by one. At this point we assume + * that the cost of migration increases, as the level increase. + */ + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 + execution_level = 0; if (migrated_gpu_task == NULL) { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + execution_level = 1; + + if (migrated_gpu_task == NULL) { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 - if (migrated_gpu_task != NULL) + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) { - execution_level = 2; - stream_index = 2 + j; - break; + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + if (migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } } } } - } - if (migrated_gpu_task != NULL) - { - assert(migrated_gpu_task->ec != NULL); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); - /** - * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration and push it back to the queue. - */ - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + if (migrated_gpu_task != NULL) { - if (execution_level == 0) - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); - if (execution_level == 1) - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - if (execution_level == 2) - parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - - return nb_migrated; - } - - assert((migrated_gpu_task != NULL) && (migrated_gpu_task->ec != NULL)); + assert(migrated_gpu_task->ec != NULL); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + /** + * @brief if the task is a not a computational kerenel or if it is a task that has + * already been migrated, we stop the migration and push it back to the queue. + */ + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + if (execution_level == 0) + { + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + else if (execution_level == 1) + { + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + else if (execution_level == 2) + { + parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + } - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; - } - nb_migrated++; - parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); + if( migrated_gpu_task != NULL ) //make sure the task was not returned to the queue + { - /** - * @brief change migrate_status according to the status of the stage in of the - * stage_in data. - */ - if (execution_level == 2) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; - else - migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + if (execution_level == 0) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; + } + if (execution_level == 1) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; + } + if (execution_level == 2) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; + } + nb_migrated++; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - /** - * @brief An object of type migrated_task_t is created store the migrated task - * and other associated details. This object is enqueued to a node level queue. - * The main objective of this was to make sure that the manager does not have to sepend - * time on migration. It can select the task for migration, enqueue it to the node level - * queue and then return to its normal working. - */ - mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); - PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); - mig_task->gpu_task = migrated_gpu_task; - // memset(migrated_gpu_task->posssible_candidate, -1, sizeof(int32_t)); - for (k = 0; k < MAX_PARAM_COUNT; k++) - migrated_gpu_task->posssible_candidate[k] = -1; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = starving_device; - mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; - parsec_cuda_mig_task_enqueue(es, mig_task); - device_info[dealer_device_index].last_device = starving_device_index; + /** + * @brief An object of type migrated_task_t is created store the migrated task + * and other associated details. This object is enqueued to a node level queue. + * The main objective of this was to make sure that the manager does not have to sepend + * time on migration. It can select the task for migration, enqueue it to the node level + * queue and then return to its normal working. + */ + mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + mig_task->gpu_task = migrated_gpu_task; + for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->posssible_candidate[k] = -1; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + + parsec_cuda_mig_task_enqueue(es, mig_task); + device_info[dealer_device_index].last_device = starving_device_index; + + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); + } + } - char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), - execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); - } + if (will_starve(dealer_device_index)) + break; + } //end for migrated_gpu_task = NULL; /* update the expected load on the GPU device */ diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 02b01c391..1ec5a4603 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -50,6 +50,7 @@ static int parsec_cuda_flush_lru( parsec_device_module_t *device ); extern int parsec_cuda_migrate_tasks; extern int parsec_cuda_iterative; +extern int parsec_cuda_migrate_chunk_size; extern int parsec_gpu_task_count_start; extern int parsec_gpu_task_count_end; @@ -3021,8 +3022,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, nb_migrated = migrate_to_starving_device(es, gpu_device); if( nb_migrated > 0 ) { - rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); - if( 1 == rc ) + rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); + if( 0 == gpu_device->mutex ) { /* I was the last one */ #if defined(PARSEC_PROF_TRACE) if( parsec_gpu_trackable_events & PARSEC_PROFILE_GPU_TRACK_OWN ) From b3eb162544f476553af057cc94f00de928a6f64b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 19 Jul 2022 09:31:16 -0400 Subject: [PATCH 136/215] parse all available device looking for starving devices. --- parsec/mca/device/cuda/device_cuda_migrate.c | 242 ++++++++++--------- 1 file changed, 129 insertions(+), 113 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e12d09945..8f3b40b7e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -268,11 +268,11 @@ int find_starving_device(int dealer_device) { int i = 0; int starving_device = 0; - int next_device = ( (device_info[dealer_device].last_device) + 1) % NDEVICES; + int next_device = ((device_info[dealer_device].last_device) + 1) % NDEVICES; int final_device = next_device + NDEVICES; // use a round robin method to find starving device - for(i = next_device; i < final_device; i++) + for (i = next_device; i < final_device; i++) { starving_device = i % NDEVICES; @@ -364,7 +364,8 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device) { int starving_device_index = -1, dealer_device_index = 0; - int nb_migrated = 0, execution_level = 0, stream_index = 0, i = 0, j = 0, k = 0; + int nb_migrated = 0, execution_level = 0, stream_index = 0; + int i = 0, j = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; migrated_task_t *mig_task = NULL; @@ -373,125 +374,141 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ if (will_starve(dealer_device_index)) return 0; - starving_device_index = find_starving_device(dealer_device_index); - if (starving_device_index == -1) - return 0; - starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - - for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) + // parse all available device looking for starving devices. + int d_first = (device_info[dealer_device_index].last_device + 1 ) % NDEVICES; + for (d = d_first; d < (d_first + NDEVICES); d++) { - /** - * @brief Tasks are searched in different levels one by one. At this point we assume - * that the cost of migration increases, as the level increase. - */ - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 - execution_level = 0; - if (migrated_gpu_task == NULL) - { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 - execution_level = 1; - if (migrated_gpu_task == NULL) - { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) - { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 - if (migrated_gpu_task != NULL) - { - execution_level = 2; - stream_index = 2 + j; - break; - } - } - } - } + //starving_device_index = find_starving_device(dealer_device_index); + //if (starving_device_index == -1) + // return 0; + //starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); + + starving_device_index = d % NDEVICES; + if (d == dealer_device_index || !(is_starving(starving_device_index)) ) + continue; + + starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - if (migrated_gpu_task != NULL) + for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { - assert(migrated_gpu_task->ec != NULL); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); /** - * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration and push it back to the queue. + * @brief Tasks are searched in different levels one by one. At this point we assume + * that the cost of migration increases, as the level increase. */ - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 + execution_level = 0; + if (migrated_gpu_task == NULL) { - if (execution_level == 0) - { - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; - } - else if (execution_level == 1) - { - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; - } - else if (execution_level == 2) + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + execution_level = 1; + + if (migrated_gpu_task == NULL) { - parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + if (migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } + } } } - if( migrated_gpu_task != NULL ) //make sure the task was not returned to the queue + if (migrated_gpu_task != NULL) { - - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) + assert(migrated_gpu_task->ec != NULL); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + /** + * @brief if the task is a not a computational kerenel or if it is a task that has + * already been migrated, we stop the migration and push it back to the queue. + */ + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; + if (execution_level == 0) + { + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + else if (execution_level == 1) + { + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + else if (execution_level == 2) + { + parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } } - nb_migrated++; - parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - /** - * @brief change migrate_status according to the status of the stage in of the - * stage_in data. - */ - if (execution_level == 2) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; - else - migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + if (migrated_gpu_task != NULL) // make sure the task was not returned to the queue + { - /** - * @brief An object of type migrated_task_t is created store the migrated task - * and other associated details. This object is enqueued to a node level queue. - * The main objective of this was to make sure that the manager does not have to sepend - * time on migration. It can select the task for migration, enqueue it to the node level - * queue and then return to its normal working. - */ - mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); - PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); - mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->posssible_candidate[k] = -1; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = starving_device; - mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); - - parsec_cuda_mig_task_enqueue(es, mig_task); - device_info[dealer_device_index].last_device = starving_device_index; - - char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), - execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); + if (execution_level == 0) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; + } + if (execution_level == 1) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; + } + if (execution_level == 2) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; + } + nb_migrated++; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); + + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + + /** + * @brief An object of type migrated_task_t is created store the migrated task + * and other associated details. This object is enqueued to a node level queue. + * The main objective of this was to make sure that the manager does not have to sepend + * time on migration. It can select the task for migration, enqueue it to the node level + * queue and then return to its normal working. + */ + mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + mig_task->gpu_task = migrated_gpu_task; + for (k = 0; k < MAX_PARAM_COUNT; k++) + migrated_gpu_task->posssible_candidate[k] = -1; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + + parsec_cuda_mig_task_enqueue(es, mig_task); + device_info[dealer_device_index].last_device = starving_device_index; + + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); + } } - } + + if (will_starve(dealer_device_index)) + break; + } // end for i if (will_starve(dealer_device_index)) - break; - } //end for + break; + } // end for d migrated_gpu_task = NULL; /* update the expected load on the GPU device */ @@ -607,7 +624,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out->device_private != NULL); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); } - else //TASK_MIGRATED_BEFORE_STAGE_IN + else // TASK_MIGRATED_BEFORE_STAGE_IN { assert(task->data[i].data_in != NULL); @@ -638,7 +655,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - } } } @@ -675,10 +691,10 @@ int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_mo /** * @brief Associate a task with a particular device_index. - * - * @param task - * @param device_index - * @return int + * + * @param task + * @param device_index + * @return int */ int update_task_to_device_mapping(parsec_task_t *task, int device_index) { @@ -702,10 +718,10 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) /** * @brief Check if the task has any particular task mapping, - * if it has return the device_index, or else return -1. - * - * @param task - * @return int + * if it has return the device_index, or else return -1. + * + * @param task + * @return int */ int find_task_to_device_mapping(parsec_task_t *task) { From b6270a3d04f0a4089326cce2929a0f8ce56fad95 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 19 Jul 2022 12:52:49 -0400 Subject: [PATCH 137/215] migration condition updated and the assert removed. Assert conflicts with migration work flows. --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_module.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 8f3b40b7e..d51248fbc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -633,7 +633,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t /* If its not the owner then the existing stage_in mechanism will take careof the rest*/ (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && /* If dealer device does not have a new version then then the existing stage_in mechanism will take careof the rest*/ - (task->data[i].data_out->version != task->data[i].data_out->original->device_copies[0]->version)) + (task->data[i].data_out->version > task->data[i].data_out->original->device_copies[0]->version)) { parsec_data_t *original = task->data[i].data_out->original; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 1ec5a4603..b5fcbc787 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -847,7 +847,7 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - assert(0); + //assert(0); } #endif return PARSEC_SUCCESS; @@ -1352,7 +1352,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) && /* Anti depdendency like behaviour may happen during task migration */ - (gpu_task->migrate_status > TASK_NOT_MIGRATED) ) { + (gpu_task->migrate_status > TASK_NOT_MIGRATED) ) + { parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " "(possible anti-dependency,\n" "or concurrent accesses), please prevent that with CTL dependencies\n", @@ -1639,8 +1640,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.nb_data_faults += nb_elts; ///* update the data version in GPU immediately, and mark the data under transfer */ - assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) - || (gpu_task->migrate_status > TASK_NOT_MIGRATED)); + //assert((gpu_elem->version != in_elem->version) || (gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_NOT_TRANSFER) + // || (gpu_task->migrate_status > TASK_NOT_MIGRATED)); gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] gets the same version %d as copy %p [ref_count %d] at %s:%d", From 34673e00d75eef1fb817688d43f6f752090bfb58 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 20 Jul 2022 19:22:18 -0400 Subject: [PATCH 138/215] For tasks migrated after stage_ in, during the first stage_in we would have increased the refcount of the data_in. If the task was not migrated, then the generated code would have decremented this refcount after the task was executed. But now as we have migrated the task, this decrement will not happen. So we remeber the the first data_in. This remembered data_in is RELEASED after the task is completed. --- parsec/mca/device/cuda/device_cuda_module.c | 72 ++++++++++++++------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index b5fcbc787..1a82649bd 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1388,6 +1388,18 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); + + /** + * @brief For tasks migrated after stage_ in, during the first stage_in + * we would have increased the refcount of the data_in. If the task was not + * migrated, then the generated code would have decremented this refcount after + * the task was executed. But now as we have migrated the task, this decrement + * will not happen. So we remeber the the first data_in and the remembered + * data_in will me RELEASED after task completion. + */ + + if( gpu_task->original_data_in[ flow->flow_index ] == NULL) + gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; goto src_selected; } @@ -1423,19 +1435,14 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); /** - * If we change the data_in of a task, we should RELEASE it only after the task - * is complete. Or else it may cause problem for special cases in PTG, when calling - * the data reshape associated with iterate successor (For instance this happens when - * NEW is called for a data created solely in the GPU). To mitigate this problem we - * save the original data_in to be released in the end. + * @brief We have remembered the data_in of the first stage_in. If the current + * data_in is not same as the first stage_in RELEASE it, as we will not be using + * it. */ - if( gpu_task->original_data_in[ flow->flow_index ] == NULL) - gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; - else - { - if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - PARSEC_OBJ_RELEASE(task_data->data_in); - } + if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) + && (gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) ) + PARSEC_OBJ_RELEASE(task_data->data_in); + task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1476,14 +1483,18 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, undo_readers_inc_if_no_transfer = 1; /* We swap data_in with candidate, so we update the reference counters */ PARSEC_OBJ_RETAIN(candidate); + + /** + * @brief For tasks migrated after stage_ in, during the first stage_in + * we would have increased the refcount of the data_in. If the task was not + * migrated, then the generated code would have decremented this refcount after + * the task was executed. But now as we have migrated the task, this decrement + * will not happen. So we remeber the the first data_in and the remembered + * data_in will me RELEASED after task completion. + */ if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; - else - { - if( gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - PARSEC_OBJ_RELEASE(task_data->data_in); - } task_data->data_in = candidate; in_elem = candidate; @@ -1533,9 +1544,17 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } /* Do not need to be tranferred */ - if( -1 == transfer_from ) { + if( -1 == transfer_from ) + { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - } else { + + if( undo_readers_inc_if_no_transfer ) + { + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); + } + } + else + { /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; @@ -1655,12 +1674,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_atomic_unlock( &original->lock ); return 1; } - - if( undo_readers_inc_if_no_transfer ) - { - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); - } - assert( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); + assert( transfer_from == -1 || gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); @@ -2941,6 +2955,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, goto remove_gpu_task; } + /** + * @brief For tasks migrated after stage_ in, during the first stage_in + * we would have increased the refcount of the data_in. If the task was not + * migrated, then the generated code would have decremented this refcount after + * the task was executed. But now as we have migrated the task, this decrement + * will not happen. Here we RELEASE the remembered data_in of the task. + */ + int f = 0; for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) { From 2a8010aeac3fb9142f72aa1cdece3b59fc8a147a Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 21 Jul 2022 12:05:56 -0400 Subject: [PATCH 139/215] minor code updation --- parsec/mca/device/cuda/device_cuda_migrate.c | 11 ++---- parsec/mca/device/cuda/device_cuda_module.c | 39 ++++++++++---------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d51248fbc..3437f5d35 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -549,6 +549,10 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { parsec_data_t *original = task->data[i].data_out->original; + assert(original->device_copies[dealer_device->super.device_index] != NULL); + assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); + assert(task->data[i].data_out->device_index == dealer_device->super.device_index); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** * we set a possible candidate for this flow of the task. This will allow @@ -616,13 +620,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - - assert(task->data[i].data_out != NULL); - assert(original->device_copies[dealer_device->super.device_index] != NULL); - assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); - assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - assert(task->data[i].data_out->device_private != NULL); - assert(task->data[i].data_out->device_index == dealer_device->super.device_index); } else // TASK_MIGRATED_BEFORE_STAGE_IN { diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 1a82649bd..a6e2efd12 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1421,28 +1421,30 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ) { - /** - * if the data was already staged_in then we would have already incremented - * the reader for it. - */ - if(gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); - undo_readers_inc_if_no_transfer = 1; - - PARSEC_OBJ_RETAIN(candidate); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 2) to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); /** * @brief We have remembered the data_in of the first stage_in. If the current - * data_in is not same as the first stage_in RELEASE it, as we will not be using - * it. + * data_in is not (will not) be the same as the first stage_in, so RELEASE it, + * as we will not be using it. */ - if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) - && (gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) ) - PARSEC_OBJ_RELEASE(task_data->data_in); + if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) ) + { + if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + PARSEC_OBJ_RELEASE(task_data->data_in); + } + /** + * if the data was already staged_in then we would have already incremented + * the reader for it. + */ + if(gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + undo_readers_inc_if_no_transfer = 1; + /* We swap data_in with candidate, so we update the reference counters */ + PARSEC_OBJ_RETAIN(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; @@ -1479,11 +1481,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate (case 3) to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); - undo_readers_inc_if_no_transfer = 1; - /* We swap data_in with candidate, so we update the reference counters */ - PARSEC_OBJ_RETAIN(candidate); - /** * @brief For tasks migrated after stage_ in, during the first stage_in * we would have increased the refcount of the data_in. If the task was not @@ -1496,6 +1493,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); + undo_readers_inc_if_no_transfer = 1; + /* We swap data_in with candidate, so we update the reference counters */ + PARSEC_OBJ_RETAIN(candidate); task_data->data_in = candidate; in_elem = candidate; in_elem_dev = target; From 6801f58a45bd65554920e777019586f80a93b980 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 21 Jul 2022 14:20:18 -0400 Subject: [PATCH 140/215] asserts added --- parsec/mca/device/cuda/device_cuda_module.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index a6e2efd12..459093d5d 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1390,7 +1390,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); /** - * @brief For tasks migrated after stage_ in, during the first stage_in + * @brief For tasks migrated after stage_in, during the first stage_in * we would have increased the refcount of the data_in. If the task was not * migrated, then the generated code would have decremented this refcount after * the task was executed. But now as we have migrated the task, this decrement @@ -1400,6 +1400,15 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + else + { + /** + * @brief gpu_task->original_data_in[ flow->flow_index ] should not be + * NULL only for tasks of type TASK_MIGRATED_AFTER_STAGE_IN + */ + + assert(0); + } goto src_selected; } @@ -1492,6 +1501,15 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + else + { + /** + * @brief gpu_task->original_data_in[ flow->flow_index ] should not be + * NULL only for tasks of type TASK_MIGRATED_AFTER_STAGE_IN. For the first + * stage_in it should never be anything other than NULL. + */ + assert(0); + } PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; From ed39230fd3855c9919c117f902ac0c6a66004118 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 21 Jul 2022 15:27:07 -0400 Subject: [PATCH 141/215] LRU updated for write only access. --- parsec/mca/device/cuda/device_cuda_migrate.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3437f5d35..99f45ceec 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -581,7 +581,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** - * If the task has only read.wtite access, the data may have been modified + * If the task has only read-write access, the data may have been modified * by another task, and it may be 'dirty'. We check the version of the data * to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, * if not is is pused to gpu_mem_lru. @@ -600,19 +600,20 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** - * If the task has a write only option, the taks should have written to it. But as we - * are migrating the task this write will never happen. So this data can be evicted - * immediatly. To ensure this eviction, we push the data to gpu_mem_lru. + * If the task has a write only option, so the readers will be be 0. If we push it to + * gpu_mem_lru it may get evicted before we can usee it as a possible candidate for the + * next stage_in. So we push it to gpu_mem_owned_lru. */ if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers == 0); + assert(task->data[i].data_out->super.super.obj_reference_count == 1); parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - assert(task->data[i].data_out->super.super.obj_reference_count == 1); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, From e54ccf402c287ad390ed06a85f504388b2bb01ba Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 21 Jul 2022 17:23:15 -0400 Subject: [PATCH 142/215] cosmetic changes --- parsec/mca/device/cuda/device_cuda_migrate.c | 90 ++++++-------------- parsec/mca/device/cuda/device_cuda_migrate.h | 3 - 2 files changed, 24 insertions(+), 69 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 99f45ceec..b6202c672 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,12 +1,12 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" extern int parsec_device_cuda_enabled; -extern int parsec_cuda_migrate_chunk_size; +extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) parsec_device_cuda_info_t *device_info; -static parsec_list_t *migrated_task_list; -static int NDEVICES; -static parsec_hash_table_t *task_mapping_ht = NULL; +static parsec_list_t *migrated_task_list; // list of all migrated task +static int NDEVICES; // total number of GPUs +static parsec_hash_table_t *task_mapping_ht = NULL; // hashtable for storing task mapping static int task_migrated_per_tp = 0; static int tp_count; @@ -33,7 +33,6 @@ static void task_mapping_ht_free_elt(void *_item, void *table) static void gpu_dev_profiling_init() { - // const char *gpu_dev_prof_info_str = "exec_time{double};device_index{int32_t};task_count{int32_t}"; parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), "device_index{int32_t};task_count{int32_t}", @@ -75,15 +74,11 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].total_tasks_executed = 0; device_info[i].received = 0; device_info[i].last_device = i; - device_info[i].iam_starving = 1; } -#if defined(PARSEC_HAVE_CUDA) - nvml_ret = nvmlInit_v2(); -#endif - task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); parsec_hash_table_init(task_mapping_ht, offsetof(task_mapping_item_t, ht_item), 16, task_mapping_table_generic_key_fn, NULL); + #if defined(PARSEC_PROF_TRACE) gpu_dev_profiling_init(); #endif @@ -141,48 +136,6 @@ double current_time() return (MPI_Wtime() - start); } -/** - * @brief returns the load of a particular device - * - * nvml_utilization has two fields - gpu and memory - * gpu - Percent of time over the past sample period during which one or more kernels was executing on the GPU. - * memory - Percent of time over the past sample period during which global (device) memory was being read or written - * - * @param device index of the device - * @return int - */ - -int parsec_cuda_get_device_load(int device) -{ -#if defined(PARSEC_HAVE_CUDA) - nvmlDevice_t nvml_device; - nvmlUtilization_t nvml_utilization; - nvmlReturn_t nvml_ret; - - nvmlDeviceGetHandleByIndex_v2(device, &nvml_device); - nvml_ret = nvmlDeviceGetUtilizationRates(nvml_device, &nvml_utilization); - device_info[device].load = nvml_utilization.gpu; - -// printf("NVML Device Load GPU %d Memory %d \n", nvml_utilization.gpu, nvml_utilization.memory); -#else - device_info[device].load = device_info[device].task_count; -#endif /* PARSEC_HAVE_CUDA */ - - return device_info[device].load; -} - -/** - * @brief sets the load of a particular device - * - * @param device index of the device - * @return int - */ - -int parsec_cuda_set_device_load(int device, int load) -{ - int rc = parsec_atomic_fetch_add_int32(&(device_info[device].load), load); -} - /** * @brief returns the number of tasks in a particular device * @@ -216,7 +169,7 @@ int parsec_cuda_set_device_task(int device, int task_count, int level) } /** - * @brief sets the load of a particular device + * @brief Incerement the total task executed by a device * * @param device index of the device * @return int @@ -375,19 +328,13 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ return 0; // parse all available device looking for starving devices. - int d_first = (device_info[dealer_device_index].last_device + 1 ) % NDEVICES; + int d_first = (device_info[dealer_device_index].last_device + 1) % NDEVICES; for (d = d_first; d < (d_first + NDEVICES); d++) { - - //starving_device_index = find_starving_device(dealer_device_index); - //if (starving_device_index == -1) - // return 0; - //starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); - starving_device_index = d % NDEVICES; - if (d == dealer_device_index || !(is_starving(starving_device_index)) ) + if (d == dealer_device_index || !(is_starving(starving_device_index))) continue; - + starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) @@ -396,10 +343,13 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ * @brief Tasks are searched in different levels one by one. At this point we assume * that the cost of migration increases, as the level increase. */ + + // level 0 - task is just pushed to the device queue migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 execution_level = 0; if (migrated_gpu_task == NULL) { + // level1 - task is aavailble in the stage_in queue. Stage_in not started. migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 execution_level = 1; @@ -407,6 +357,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ { for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) { + // level2 - task is available in one of the execution queue stage_in is complete migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 if (migrated_gpu_task != NULL) { @@ -484,15 +435,17 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ */ mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + mig_task->gpu_task = migrated_gpu_task; for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->posssible_candidate[k] = -1; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); + device_info[dealer_device_index].last_device = starving_device_index; char tmp[MAX_TASK_STRLEN]; @@ -507,8 +460,8 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } // end for i if (will_starve(dealer_device_index)) - break; - } // end for d + break; + } // end for d migrated_gpu_task = NULL; /* update the expected load on the GPU device */ @@ -612,7 +565,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } @@ -700,6 +653,11 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) task_mapping_item_t *item; key = task->task_class->make_key(task->taskpool, task->locals); + + /** + * @brief Entry NULL imples that this task has never been migrated + * till now in any of the iteration. So we start a new entry. + */ if (NULL == (item = parsec_hash_table_nolock_find(task_mapping_ht, key))) { diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 7f1513f49..e0fcb665a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -37,7 +37,6 @@ typedef struct parsec_device_cuda_info_s int level2; int received; int last_device; - int iam_starving; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -63,8 +62,6 @@ typedef struct gpu_dev_prof_s int parsec_cuda_migrate_init(int ndevices); int parsec_cuda_migrate_fini(); -int parsec_cuda_get_device_load(int device); -int parsec_cuda_set_device_load(int device, int load); int parsec_cuda_get_device_task(int device, int level); int parsec_cuda_set_device_task(int device, int task_count, int level); int parsec_cuda_tasks_executed(int device); From 3eb899a17a357eb8b2954b3c2c9a19e01ea187ef Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 22 Jul 2022 10:24:36 -0400 Subject: [PATCH 143/215] cosmetic changes --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++++++ parsec/mca/device/cuda/device_cuda_module.c | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b6202c672..afb5c5858 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -532,6 +532,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + + //why not just? + //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** * If the task has only read-write access, the data may have been modified @@ -551,6 +554,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + + //why not just? + //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** * If the task has a write only option, so the readers will be be 0. If we push it to diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 459093d5d..70e86b902 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -816,7 +816,7 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de int i, ref_count; if( gpu_copy != NULL) { - parsec_warning("parsec_cuda_memory_release_list: Release copy %p original %d readers %d ref_count %d. The copy should have been NULL by this point!! (%s:%d)", + parsec_warning("parsec_cuda_memory_release_list: Release copy %p original %p readers %d ref_count %d. The copy should have been NULL by this point!! (%s:%d)", gpu_copy, gpu_copy->original, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, __FILE__, __LINE__); //ref_count = gpu_copy->super.super.obj_reference_count; @@ -971,7 +971,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[i]); if( NULL == gpu_elem->device_private ) { #endif - find_another_data: /* Look for a data_copy to free */ lru_gpu_elem = (parsec_gpu_data_copy_t*)parsec_list_pop_front(&gpu_device->gpu_mem_lru); @@ -1020,6 +1019,12 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, * always remove the data from the LRU. */ if( 0 != lru_gpu_elem->readers ) { + + parsec_warning("GPU[%s]:%s: Drop LRU-retrieved CUDA copy: data_copy %p [ original %p readers %d refcount %d ]", + gpu_device->super.name, task_name, + lru_gpu_elem, lru_gpu_elem->original, lru_gpu_elem->readers, + lru_gpu_elem->super.super.obj_reference_count); + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:%s: Drop LRU-retrieved CUDA copy %p [readers %d, ref_count %d] original %p", gpu_device->super.name, task_name, From be42c19ea93d41e4772bd7790c9b94a5fbde3d97 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 22 Jul 2022 15:36:16 -0400 Subject: [PATCH 144/215] additional debug statement added --- parsec/mca/device/cuda/device_cuda_module.c | 4 ++++ parsec/utils/zone_malloc.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 70e86b902..cae6e05ad 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2864,6 +2864,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } /* If we can extract data go for it, otherwise try to drain the pending tasks */ gpu_task = parsec_gpu_create_w2r_task(gpu_device, es); + + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Creating parsec_gpu_create_w2r_task on GPU[%s] dev_index %d", + gpu_device->super.name, gpu_device->super.device_index); + if( NULL != gpu_task ) goto get_data_out_of_device; } diff --git a/parsec/utils/zone_malloc.c b/parsec/utils/zone_malloc.c index fc6d6f238..595b35265 100644 --- a/parsec/utils/zone_malloc.c +++ b/parsec/utils/zone_malloc.c @@ -185,6 +185,10 @@ size_t zone_in_use(zone_malloc_t *gdata) current_tid += current_segment->nb_units) { if( current_segment->status == SEGMENT_FULL ) { ret += gdata->unit_size * current_segment->nb_units; + + PARSEC_DEBUG_VERBOSE(10, parsec_debug_output, + "Zone in use for device memory %p segment %p unit_size %d nb_units %d", + gdata, gdata->segments[current_tid], gdata->unit_size, current_segment->nb_units); } } return ret; From 279d0ff100f54778e3285b9d5f157f9020f19a87 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 22 Jul 2022 17:20:42 -0400 Subject: [PATCH 145/215] If the flow is write only, we free the data immediatly as this data should never be written back. As the data_in of a write only flow is always CPU copy we revert to the original stage_in mechanism for write only flows. --- parsec/mca/device/cuda/device_cuda_migrate.c | 41 +++++++++++++------- parsec/mca/device/cuda/device_cuda_module.c | 9 ----- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index afb5c5858..df8803546 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -501,18 +501,13 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if (stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN) { parsec_data_t *original = task->data[i].data_out->original; + parsec_atomic_lock( &original->lock ); assert(original->device_copies[dealer_device->super.device_index] != NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; /** * Even if the task has only read access, the data may have been modified @@ -525,6 +520,13 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { assert(task->data[i].data_out->readers > 0); + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ + gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); @@ -547,6 +549,13 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { assert(task->data[i].data_out->readers > 0); + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ + gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); @@ -554,27 +563,33 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - - //why not just? - //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } + /** - * If the task has a write only option, so the readers will be be 0. If we push it to - * gpu_mem_lru it may get evicted before we can usee it as a possible candidate for the - * next stage_in. So we push it to gpu_mem_owned_lru. + * If the flow is write only, we free the data immediatly as this data should never + * be written back. As the data_in of a write only flow is always CPU copy we revert + * to the original stage_in mechanism for write only flows. */ if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers == 0); assert(task->data[i].data_out->super.super.obj_reference_count == 1); + assert(original->device_copies[0] != NULL); + assert(task->data[i].data_in == original->device_copies[0]); parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); + parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); + PARSEC_OBJ_RELEASE(task->data[i].data_out); + zone_free( gpu_device->memory, (void*)(task->data[i].data_out->device_private) ); + } + parsec_atomic_unlock( &original->lock ); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index cae6e05ad..08c31672c 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1506,15 +1506,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; - else - { - /** - * @brief gpu_task->original_data_in[ flow->flow_index ] should not be - * NULL only for tasks of type TASK_MIGRATED_AFTER_STAGE_IN. For the first - * stage_in it should never be anything other than NULL. - */ - assert(0); - } PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; From f0c2c56dbe720ab0453c4b5c6b721a604ae147c3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 22 Jul 2022 17:58:44 -0400 Subject: [PATCH 146/215] If the data of the task has not been staged in we can revert to the existing stage in mechanism, as all the data it needs is pointed by data_in and data_out has not been allocated in any of the GPU as we have not started the stage_in process yet. --- parsec/mca/device/cuda/device_cuda_migrate.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index df8803546..890c946a7 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -548,6 +548,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); + assert(original->device_copies[0] != NULL); + assert(task->data[i].data_in == original->device_copies[0]); /** * we set a possible candidate for this flow of the task. This will allow @@ -596,8 +598,17 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); } + else // TASK_MIGRATED_BEFORE_STAGE_IN { + /** + * @brief If the data of the task has not been staged in we can revert to + * the existing stage in mechanism, as all the data it needs is pointed + * by data_in and data_out has not been allocated in any of the GPU as we have + * not started the stage_in process yet. + */ + + #if 0 assert(task->data[i].data_in != NULL); if (/* This condition is required as task->data[i].data_out may be poitining to a junk value*/ @@ -628,6 +639,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); } + + #endif } } From 10b891acd0a243ece48bf9a997bc53418a9b0e17 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 23 Jul 2022 18:57:32 -0400 Subject: [PATCH 147/215] stage_in of migrated task simplified --- parsec/mca/device/cuda/device_cuda_migrate.c | 92 +++--------- parsec/mca/device/cuda/device_cuda_module.c | 145 ++++++++----------- 2 files changed, 74 insertions(+), 163 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 890c946a7..4951265b2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -487,26 +487,25 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_copy_t *src_copy = NULL; char tmp[128]; - for (i = 0; i < task->task_class->nb_flows; i++) + /** + * Data is already staged in the dealer device and we can find all the data + * of the tasks to be migrated in the dealer device. + */ + if (stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN) { - if (task->data[i].data_out == NULL) - continue; - if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) // CTL flow - continue; - /** - * Data is already staged in the dealer device and we can find all the data - * of the tasks to be migrated in the dealer device. - */ - if (stage_in_status == TASK_MIGRATED_AFTER_STAGE_IN) + for (i = 0; i < task->task_class->nb_flows; i++) { - parsec_data_t *original = task->data[i].data_out->original; - parsec_atomic_lock( &original->lock ); + if (task->data[i].data_out == NULL) + continue; + if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) // CTL flow + continue; + parsec_data_t *original = task->data[i].data_out->original; + parsec_atomic_lock(&original->lock); assert(original->device_copies[dealer_device->super.device_index] != NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** @@ -519,24 +518,18 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); - /** * we set a possible candidate for this flow of the task. This will allow * us to easily find the stage_in data as the possible candidate in * parsec_gpu_data_stage_in() function. */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - - //why not just? - //parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } /** * If the task has only read-write access, the data may have been modified @@ -550,25 +543,21 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(task->data[i].data_out->readers > 0); assert(original->device_copies[0] != NULL); assert(task->data[i].data_in == original->device_copies[0]); - /** * we set a possible candidate for this flow of the task. This will allow * us to easily find the stage_in data as the possible candidate in * parsec_gpu_data_stage_in() function. */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } - /** - * If the flow is write only, we free the data immediatly as this data should never + * If the flow is write only, we free the data immediatly as this data should never * be written back. As the data_in of a write only flow is always CPU copy we revert * to the original stage_in mechanism for write only flows. */ @@ -582,66 +571,19 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); - PARSEC_OBJ_RELEASE(task->data[i].data_out); - zone_free( gpu_device->memory, (void*)(task->data[i].data_out->device_private) ); - + PARSEC_OBJ_RELEASE(task->data[i].data_out); + zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); } - - parsec_atomic_unlock( &original->lock ); - + parsec_atomic_unlock(&original->lock); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); } - - else // TASK_MIGRATED_BEFORE_STAGE_IN - { - /** - * @brief If the data of the task has not been staged in we can revert to - * the existing stage in mechanism, as all the data it needs is pointed - * by data_in and data_out has not been allocated in any of the GPU as we have - * not started the stage_in process yet. - */ - - #if 0 - assert(task->data[i].data_in != NULL); - - if (/* This condition is required as task->data[i].data_out may be poitining to a junk value*/ - (task->data[i].data_in->original == task->data[i].data_out->original) && - /* If its not the owner then the existing stage_in mechanism will take careof the rest*/ - (task->data[i].data_out->original->owner_device == dealer_device->super.device_index) && - /* If dealer device does not have a new version then then the existing stage_in mechanism will take careof the rest*/ - (task->data[i].data_out->version > task->data[i].data_out->original->device_copies[0]->version)) - { - parsec_data_t *original = task->data[i].data_out->original; - - assert(original->device_copies[0] != NULL); - assert(original->device_copies[original->owner_device] != NULL); - - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; - - assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] possiible candidate from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, task->data[i].data_out->readers, - task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - } - - #endif - } } return 0; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 08c31672c..8e49c0764 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1334,10 +1334,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_copy_t* in_elem = task_data->data_in; parsec_data_t* original = in_elem->original; parsec_gpu_data_copy_t* gpu_elem = task_data->data_out; + parsec_device_cuda_module_t *in_elem_dev = NULL; uint32_t nb_elts = gpu_task->flow_nb_elts[flow->flow_index]; int transfer_from = -1; int undo_readers_inc_if_no_transfer = 0; - char tmp[128]; if( gpu_task->task_type == PARSEC_GPU_TASK_TYPE_PREFETCH ) { PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream, @@ -1355,9 +1355,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { if (gpu_elem->readers > 0 ) { - if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) && - /* Anti depdendency like behaviour may happen during task migration */ - (gpu_task->migrate_status > TASK_NOT_MIGRATED) ) + if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " "(possible anti-dependency,\n" @@ -1375,17 +1373,62 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_LIST_ITEM_SINGLETON(gpu_elem); } + /** + * @brief Detect if this was a migrated task and if we have already identified + * a possible candidate as the source of stage_in + */ + if( (gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) + && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) + { + int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; + /** + * A possible candidate is set when we call change_task_features() during migration + * preparation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater + * than 1, it means that we have already identifies a staged_in data as the possible + * candidate. So we can directly use that data for D2D transfer. + */ + parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; + parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); + + assert(PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 2) to Device to Device copy", + gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); + + /** + * @brief We have remembered the original data_in of the first stage_in. But + * instead of the original data_in the first stage_in could have used another + * candidate as the source stage_in. So we decrement the refcount of the + * first stage_in candidate. + */ + if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) ) + { + if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + PARSEC_OBJ_RELEASE(task_data->data_in); + } + + /** + * if the data was already staged_in then we would have already incremented + * the reader for it. + */ + undo_readers_inc_if_no_transfer = 1; + /* We swap data_in with candidate, so we update the reference counters */ + PARSEC_OBJ_RETAIN(candidate); + task_data->data_in = candidate; + in_elem = candidate; + in_elem_dev = target; + + goto src_selected; + + } + /* Detect if we can do a device to device copy. * Current limitations: only for read-only data used read-only on the hosting GPU. */ - parsec_device_cuda_module_t *in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); - if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type)) - || (gpu_task->migrate_status > TASK_NOT_MIGRATED) /* make sure limitation does not affect migrated tasks */) + in_elem_dev = (parsec_device_cuda_module_t*)parsec_mca_device_get( in_elem->device_index ); + if( ((PARSEC_FLOW_ACCESS_READ & type) && !(PARSEC_FLOW_ACCESS_WRITE & type))) { int potential_alt_src = 0; - if( (PARSEC_DEV_CUDA == in_elem_dev->super.super.type) && - /* if the migrated task is one whose data has already been staged in we will always - use the posssible candidate we have identified*/ - (gpu_task->migrate_status != TASK_MIGRATED_AFTER_STAGE_IN)) + if(PARSEC_DEV_CUDA == in_elem_dev->super.super.type) { if( gpu_device->peer_access_mask & (1 << in_elem_dev->cuda_index) ) { @@ -1393,81 +1436,15 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); - - /** - * @brief For tasks migrated after stage_in, during the first stage_in - * we would have increased the refcount of the data_in. If the task was not - * migrated, then the generated code would have decremented this refcount after - * the task was executed. But now as we have migrated the task, this decrement - * will not happen. So we remeber the the first data_in and the remembered - * data_in will me RELEASED after task completion. - */ - + + // Remember the original data_in. if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; - else - { - /** - * @brief gpu_task->original_data_in[ flow->flow_index ] should not be - * NULL only for tasks of type TASK_MIGRATED_AFTER_STAGE_IN - */ - - assert(0); - } goto src_selected; } } - // if the task is a migrated task and the possible candidate has already been identified - if( (gpu_task->migrate_status > TASK_NOT_MIGRATED) - && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) - { - int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; - /** - * A possible candidate is set when we call change_task_features() during migration - * preperation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater - * tha 1, it means that we have already identifies a staged_in data as the possible - * candidate. So we can directly use that data for D2D ytransfer. - */ - parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; - parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); - - if( PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ) - { - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 2) to Device to Device copy", - gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); - - /** - * @brief We have remembered the data_in of the first stage_in. If the current - * data_in is not (will not) be the same as the first stage_in, so RELEASE it, - * as we will not be using it. - */ - if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) ) - { - if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - PARSEC_OBJ_RELEASE(task_data->data_in); - } - - /** - * if the data was already staged_in then we would have already incremented - * the reader for it. - */ - if(gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) - PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); - undo_readers_inc_if_no_transfer = 1; - /* We swap data_in with candidate, so we update the reference counters */ - PARSEC_OBJ_RETAIN(candidate); - task_data->data_in = candidate; - in_elem = candidate; - in_elem_dev = target; - - goto src_selected; - - } - } - for(int t = 1; t < (int)parsec_nb_devices; t++) { parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(t); if( PARSEC_DEV_CUDA != target->super.super.type ) continue; @@ -1495,15 +1472,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate (case 3) to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - /** - * @brief For tasks migrated after stage_ in, during the first stage_in - * we would have increased the refcount of the data_in. If the task was not - * migrated, then the generated code would have decremented this refcount after - * the task was executed. But now as we have migrated the task, this decrement - * will not happen. So we remeber the the first data_in and the remembered - * data_in will me RELEASED after task completion. - */ - + // Remember the original data_in. if( gpu_task->original_data_in[ flow->flow_index ] == NULL) gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; From 414c03ffc78ba93fad596dde02111f0c2dcd5a20 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 24 Jul 2022 16:18:51 -0400 Subject: [PATCH 148/215] cosmetic changes --- parsec/mca/device/device.c | 115 ------------------------------------- 1 file changed, 115 deletions(-) diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index aeb16257f..7ac902422 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -70,121 +70,6 @@ float *parsec_device_tweight = NULL; * execution context is not released). * -1 - if the kernel is scheduled to be executed on a GPU. */ -#if 0 - -int parsec_get_best_device( parsec_task_t* this_task, double ratio ) -{ - int i, dev_index = -1, data_index; - parsec_taskpool_t* tp = this_task->taskpool; - - if(parsec_cuda_iterative) - { - // if task to device mapping is already available use that - dev_index = find_task_to_device_mapping(this_task); - if(dev_index != -1) - return dev_index; - } - - - /* Select the location of the first data that is used in READ/WRITE or pick the - * location of one of the READ data. For now use the last one. - */ - for( i = 0; i < this_task->task_class->nb_flows; i++ ) { - /* Make sure data_in is not NULL */ - if( NULL == this_task->data[i].data_in ) continue; - /* And that we have a data (aka it is not NEW) */ - if( NULL == this_task->data[i].source_repo_entry ) continue; - - /* Data is updated by the task, and we try to minimize the data movements */ - if( (NULL != this_task->task_class->out[i]) && - (this_task->task_class->out[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { - - data_index = this_task->task_class->out[i]->flow_index; - /* If the data has a preferred device, try to obey it. */ - if( this_task->data[data_index].data_in->original->preferred_device > 1 ) { /* no CPU or recursive */ - dev_index = this_task->data[data_index].data_in->original->preferred_device; - break; - } - /* Data is located on a device */ - if( this_task->data[data_index].data_in->original->owner_device > 1 ) { /* no CPU or recursive */ - dev_index = this_task->data[data_index].data_in->original->owner_device; - break; - } - /* If we reach here, we cannot yet decide which device to run on based on the WRITE - * constraints, so let's pick the data for a READ flow. - */ - } - data_index = this_task->task_class->in[i]->flow_index; - if( this_task->data[data_index].data_in->original->preferred_device > 1 ) { - dev_index = this_task->data[data_index].data_in->original->preferred_device; - } else if( this_task->data[data_index].data_in->original->owner_device > 1 ) { - dev_index = this_task->data[data_index].data_in->original->owner_device; - } - } - - /* 0 is CPU, and 1 is recursive device */ - if( dev_index <= 1 ) { /* This is the first time we see this data for a GPU. - * Let's decide which GPU will work on it. */ - int best_index; - float weight, best_weight = parsec_device_load[0] + ratio * parsec_device_sweight[0]; - - /* Start with a valid device for this task */ - for(best_index = 0; best_index < parsec_mca_device_enabled(); best_index++) { - parsec_device_module_t *dev = parsec_mca_device_get(best_index); - - /* Skip the device if it is not configured */ - if(!(tp->devices_index_mask & (1 << best_index))) continue; - /* Stop on this device if there is an incarnation for it */ - for(i = 0; NULL != this_task->task_class->incarnations[i].hook; i++) - if( (this_task->task_class->incarnations[i].type == dev->type) && (this_task->chore_mask & (1<task_class->incarnations[i].hook) && (this_task->chore_mask & (1 << i))) - break; - } - - if(parsec_mca_device_enabled() == best_index) { - /* We tried all possible devices, and none of them have an implementation - * for this task! */ - parsec_warning("*** Task class '%s' has no valid implementation for the available devices", - this_task->task_class->name); - return -1; - } - - /* Start at 2, to skip the recursive body */ - for( dev_index = 2; dev_index < parsec_mca_device_enabled(); dev_index++ ) { - /* Skip the device if it is not configured */ - if(!(tp->devices_index_mask & (1 << dev_index))) continue; - weight = parsec_device_load[dev_index] + ratio * parsec_device_sweight[dev_index]; - if( best_weight > weight ) { - best_index = dev_index; - best_weight = weight; - } - } - // Load problem: was nothing to do here - parsec_device_load[best_index] += ratio * parsec_device_sweight[best_index]; - assert( best_index != 1 ); - dev_index = best_index; - } - - /* Sanity check: if at least one of the data copies is not parsec - * managed, check that all the non-parsec-managed data copies - * exist on the same device */ - for( i = 0; i < this_task->task_class->nb_flows; i++ ) { - /* Make sure data_in is not NULL */ - if (NULL == this_task->data[i].data_in) continue; - if ((this_task->data[i].data_in->flags & PARSEC_DATA_FLAG_PARSEC_MANAGED) == 0 && - this_task->data[i].data_in->device_index != dev_index) { - char task_str[MAX_TASK_STRLEN]; - parsec_fatal("*** User-Managed Copy Error: Task %s is selected to run on device %d,\n" - "*** but flow %d is represented by a data copy not managed by PaRSEC,\n" - "*** and does not have a copy on that device\n", - parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); - } - } - //dev_index = 2; - return dev_index; -} -#endif static int parsec_device_load_balance_skew = 20; static float load_balance_skew; From bc2edc56dd945330ff332bd4594d8cf2ed09b381 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 24 Jul 2022 16:25:19 -0400 Subject: [PATCH 149/215] cosmetic changes --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4951265b2..e4e4f9783 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -503,9 +503,11 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t *original = task->data[i].data_out->original; parsec_atomic_lock(&original->lock); + assert(original->device_copies[dealer_device->super.device_index] != NULL); assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; /** @@ -524,8 +526,10 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * parsec_gpu_data_stage_in() function. */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else @@ -549,8 +553,10 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * parsec_gpu_data_stage_in() function. */ gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); else @@ -577,7 +583,9 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_OBJ_RELEASE(task->data[i].data_out); zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); } + parsec_atomic_unlock(&original->lock); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, From 966ecfb987dfc3b472ec6f5c1661a8c1053e1c3b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 24 Jul 2022 20:35:02 -0400 Subject: [PATCH 150/215] data copy directly used as the candidate, instead of using the device index of the candidate. --- parsec/mca/device/cuda/device_cuda_migrate.c | 7 +++---- parsec/mca/device/cuda/device_cuda_module.c | 21 +++++++++----------- parsec/mca/device/device_gpu.h | 1 + 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e4e4f9783..1cf00da33 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -437,8 +437,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) - migrated_gpu_task->posssible_candidate[k] = -1; + for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; @@ -525,7 +524,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * us to easily find the stage_in data as the possible candidate in * parsec_gpu_data_stage_in() function. */ - gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + gpu_task->candidate[i] = task->data[i].data_out; parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); @@ -552,7 +551,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * us to easily find the stage_in data as the possible candidate in * parsec_gpu_data_stage_in() function. */ - gpu_task->posssible_candidate[i] = task->data[i].data_out->device_index; + gpu_task->candidate[i] = task->data[i].data_out; parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 8e49c0764..26f80a4db 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1378,17 +1378,16 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * a possible candidate as the source of stage_in */ if( (gpu_task->migrate_status == TASK_MIGRATED_AFTER_STAGE_IN) - && (gpu_task->posssible_candidate[flow->flow_index] > 1 ) ) + && (gpu_task->candidate[flow->flow_index] != NULL ) ) { - int possible_device_copy_index = gpu_task->posssible_candidate[flow->flow_index]; /** * A possible candidate is set when we call change_task_features() during migration - * preparation of a task. gpu_task->posssible_candidate[flow->flow_index] is greater - * than 1, it means that we have already identifies a staged_in data as the possible + * preparation of a task. If gpu_task->candidate[flow->flow_index] is not NULL, + * it means that we have already identifies a staged_in data as the possible * candidate. So we can directly use that data for D2D transfer. */ - parsec_data_copy_t *candidate = original->device_copies[possible_device_copy_index]; - parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(possible_device_copy_index); + parsec_data_copy_t *candidate = gpu_task->candidate[flow->flow_index]; + parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(candidate->device_index); assert(PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, @@ -1401,12 +1400,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * candidate as the source stage_in. So we decrement the refcount of the * first stage_in candidate. */ - if( (gpu_task->original_data_in[ flow->flow_index ] != NULL) ) - { - if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - PARSEC_OBJ_RELEASE(task_data->data_in); - } - + + if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + PARSEC_OBJ_RELEASE(task_data->data_in); + /** * if the data was already staged_in then we would have already incremented * the reader for it. diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 0f316d054..566ea113b 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -86,6 +86,7 @@ struct parsec_gpu_task_s { parsec_stage_out_function_t *stage_out; int migrate_status; int32_t posssible_candidate[MAX_PARAM_COUNT]; + parsec_data_copy_t* candidate[MAX_PARAM_COUNT]; parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; int32_t data_retained; #if defined(PARSEC_PROF_TRACE) From 90368b510be0189be25bf8fd24de85a6b88cf191 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Jul 2022 10:18:03 -0400 Subject: [PATCH 151/215] Coherency status updated based on the LRU the data is pushed. --- parsec/mca/device/cuda/device_cuda_migrate.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1cf00da33..e63360b1c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -507,8 +507,6 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - /** * Even if the task has only read access, the data may have been modified * by another task, and it may be 'dirty'. We check the version of the data @@ -530,9 +528,15 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + { parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; + } else + { parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + } } /** * If the task has only read-write access, the data may have been modified @@ -557,9 +561,15 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + { parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; + } else + { parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + } } /** * If the flow is write only, we free the data immediatly as this data should never From 3a8a7442323fc6e1bed53a3c81cbd9c12ca8290f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Jul 2022 11:03:42 -0400 Subject: [PATCH 152/215] parsec warning disabled --- parsec/mca/device/cuda/device_cuda_module.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 26f80a4db..d442683ee 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1020,11 +1020,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, */ if( 0 != lru_gpu_elem->readers ) { - parsec_warning("GPU[%s]:%s: Drop LRU-retrieved CUDA copy: data_copy %p [ original %p readers %d refcount %d ]", - gpu_device->super.name, task_name, - lru_gpu_elem, lru_gpu_elem->original, lru_gpu_elem->readers, - lru_gpu_elem->super.super.obj_reference_count); - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:%s: Drop LRU-retrieved CUDA copy %p [readers %d, ref_count %d] original %p", gpu_device->super.name, task_name, From 102e1b8fb3e495f66042fb0fe376b49f6ca636fb Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 27 Jul 2022 17:07:42 -0400 Subject: [PATCH 153/215] ensure that the coherency is changed before we push to a particular LRU. --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index e63360b1c..c0ca045ce 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -529,13 +529,13 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) { - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } else { - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } } /** @@ -562,13 +562,13 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) { - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; + parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); } else { - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); } } /** From 3f46bf5a9f3a405e7d488cb94aa1ee907290df26 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 25 Jul 2022 16:19:47 -0400 Subject: [PATCH 154/215] trace cost of migration --- parsec/mca/device/cuda/device_cuda_migrate.c | 7 ++++--- parsec/mca/device/cuda/device_cuda_migrate.h | 10 ++++++++++ parsec/mca/device/cuda/device_cuda_module.c | 21 ++++++++++++++++++++ parsec/mca/device/device_gpu.h | 5 +++++ 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c0ca045ce..d8ef54e7b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -34,9 +34,9 @@ static void task_mapping_ht_free_elt(void *_item, void *table) static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", - sizeof(gpu_dev_prof_t), - "device_index{int32_t};task_count{int32_t}", - &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); + sizeof(gpu_dev_prof_t), + "device_index{int32_t};task_count{int32_t};first{double};f_padding{int32_t};select{double};s_padding{int32_t};second{double};sec_padding{int32_t};exec_time{double};nb_tasks{int32_t};nb_padding{int32_t}", + &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } /** @@ -442,6 +442,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; + migrated_gpu_task->select = MPI_Wtime(); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index e0fcb665a..df8bc8090 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -58,6 +58,16 @@ typedef struct gpu_dev_prof_s { int32_t device_index; int32_t task_count; + double first; + int32_t f_padding; + double select; + int32_t s_padding; + double second; + int32_t sec_padding; + double exec_time; + int32_t nb_tasks; + int32_t nb_padding; + } gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d442683ee..71479a9fe 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2765,6 +2765,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device + if(gpu_task->migrate_status == TASK_NOT_MIGRATED) + gpu_task->first = MPI_Wtime(); + else + { + gpu_task->second = MPI_Wtime(); + gpu_task->nb_tasks = parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); + } + if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); return PARSEC_HOOK_RETURN_ASYNC; @@ -2831,6 +2839,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tExecute %s priority %d", gpu_device->super.name, parsec_task_snprintf(tmp, MAX_TASK_STRLEN, gpu_task->ec), gpu_task->ec->priority ); + + if(gpu_task->migrate_status != TASK_NOT_MIGRATED) + { + gpu_task->exec_time = MPI_Wtime(); + //printf("first %lf select %lf second %lf exec %lf nb_tasks %d type %d\n", + // gpu_task->first, gpu_task->select, gpu_task->second, gpu_task->exec_time, gpu_task->nb_tasks, + // gpu_task->migrate_status); + } } rc = progress_stream( gpu_device, @@ -2957,6 +2973,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_dev_prof_t prof_info; prof_info.device_index = gpu_device->super.device_index; prof_info.task_count = gpu_device->mutex; + prof_info.first = gpu_task->first; + prof_info.select = gpu_task->select; + prof_info.second = gpu_task->second; + prof_info.exec_time = gpu_task->exec_time; + prof_info.nb_tasks = gpu_task->nb_tasks; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 566ea113b..a39cda996 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -85,6 +85,11 @@ struct parsec_gpu_task_s { parsec_stage_in_function_t *stage_in; parsec_stage_out_function_t *stage_out; int migrate_status; + double first; + double select; + double second; + double exec_time; + int32_t nb_tasks; int32_t posssible_candidate[MAX_PARAM_COUNT]; parsec_data_copy_t* candidate[MAX_PARAM_COUNT]; parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; From 96ca55b18f5e5f42f260be591eb596fb8d41675a Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Jul 2022 15:22:49 -0400 Subject: [PATCH 155/215] tracing updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 11 ++++------- parsec/mca/device/cuda/device_cuda_module.c | 15 +++++++-------- parsec/mca/device/device_gpu.h | 1 + 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d8ef54e7b..637b7d4b3 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -35,7 +35,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "device_index{int32_t};task_count{int32_t};first{double};f_padding{int32_t};select{double};s_padding{int32_t};second{double};sec_padding{int32_t};exec_time{double};nb_tasks{int32_t};nb_padding{int32_t}", + "first{double};select{double};second{double};exec_time{double};stage{double};device_index{int32_t};task_count{int32_t};nb_tasks{int32_t};type{int32_t}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index df8bc8090..befa217ae 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -56,18 +56,15 @@ typedef struct task_mapping_item_s typedef struct gpu_dev_prof_s { - int32_t device_index; - int32_t task_count; double first; - int32_t f_padding; double select; - int32_t s_padding; double second; - int32_t sec_padding; double exec_time; + double stage; + int32_t device_index; + int32_t task_count; int32_t nb_tasks; - int32_t nb_padding; - + int32_t type; } gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 71479a9fe..d6bb4dbd9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2806,6 +2806,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #endif } + if(NULL != progress_task && gpu_task->migrate_status != TASK_NOT_MIGRATED) + gpu_task->stage = MPI_Wtime(); + rc = progress_stream( gpu_device, gpu_device->exec_stream[0], parsec_cuda_kernel_push, @@ -2839,16 +2842,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tExecute %s priority %d", gpu_device->super.name, parsec_task_snprintf(tmp, MAX_TASK_STRLEN, gpu_task->ec), gpu_task->ec->priority ); - - if(gpu_task->migrate_status != TASK_NOT_MIGRATED) - { - gpu_task->exec_time = MPI_Wtime(); - //printf("first %lf select %lf second %lf exec %lf nb_tasks %d type %d\n", - // gpu_task->first, gpu_task->select, gpu_task->second, gpu_task->exec_time, gpu_task->nb_tasks, - // gpu_task->migrate_status); - } } + if(NULL != progress_task && progress_task->migrate_status != TASK_NOT_MIGRATED) + progress_task->exec_time = MPI_Wtime(); + rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], NULL, @@ -2978,6 +2976,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.second = gpu_task->second; prof_info.exec_time = gpu_task->exec_time; prof_info.nb_tasks = gpu_task->nb_tasks; + prof_info.type = gpu_task->migrate_status; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index a39cda996..9725a7390 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -88,6 +88,7 @@ struct parsec_gpu_task_s { double first; double select; double second; + double stage; double exec_time; int32_t nb_tasks; int32_t posssible_candidate[MAX_PARAM_COUNT]; From 75e6e7281590f291b58dbb36d9f770685e8f2ca7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Jul 2022 18:35:07 -0400 Subject: [PATCH 156/215] tracing updated --- parsec/mca/device/cuda/device_cuda_module.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d6bb4dbd9..725fde7c9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2177,6 +2177,14 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t*)task); assert( NULL == stream->tasks[stream->start] ); + + if(NULL != task && task->migrate_status != TASK_NOT_MIGRATED) + { + if( gpu_device->exec_stream[0] == stream ) //stage_in queue + task->stage = MPI_Wtime(); + else if( gpu_device->exec_stream[1] == stream) //execution queue + task->exec_time = MPI_Wtime(); + } /** * In case the task is succesfully progressed, the corresponding profiling * event is triggered. @@ -2805,9 +2813,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->taskpool->taskpool_id, NULL, 0); #endif } - - if(NULL != progress_task && gpu_task->migrate_status != TASK_NOT_MIGRATED) - gpu_task->stage = MPI_Wtime(); rc = progress_stream( gpu_device, gpu_device->exec_stream[0], @@ -2844,9 +2849,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->priority ); } - if(NULL != progress_task && progress_task->migrate_status != TASK_NOT_MIGRATED) - progress_task->exec_time = MPI_Wtime(); - rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], NULL, From ee6658964c9d99f818d7f366547d86bb2b3acfb0 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Jul 2022 18:35:41 -0400 Subject: [PATCH 157/215] tracing updated --- parsec/mca/device/cuda/device_cuda_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 725fde7c9..e5f6cfb4a 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2182,7 +2182,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, { if( gpu_device->exec_stream[0] == stream ) //stage_in queue task->stage = MPI_Wtime(); - else if( gpu_device->exec_stream[1] == stream) //execution queue + else if( gpu_device->exec_stream[1] != stream) //execution queue task->exec_time = MPI_Wtime(); } /** From 7e95eaa7c78e49e2539d134ca5b9fb0da4628834 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 26 Jul 2022 20:25:16 -0400 Subject: [PATCH 158/215] tracing updated --- parsec/mca/device/cuda/device_cuda_module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index e5f6cfb4a..d95b71fd9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2977,6 +2977,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.select = gpu_task->select; prof_info.second = gpu_task->second; prof_info.exec_time = gpu_task->exec_time; + prof_info.stage = gpu_task->stage; prof_info.nb_tasks = gpu_task->nb_tasks; prof_info.type = gpu_task->migrate_status; parsec_profiling_trace_flags(es->es_profile, From 171b105971b3dc348d8538088bccb0c68c244a8c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 27 Jul 2022 19:09:55 -0400 Subject: [PATCH 159/215] GPU task completion moved back to manger code. Moving GPU task completion to the __parsec_task_progress() cause problem for some special tasks. So we move it back to parsec_cuda_kernel_scheduler() --- parsec/mca/device/cuda/device_cuda_module.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index d95b71fd9..67961a754 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2951,6 +2951,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, goto remove_gpu_task; } + parsec_cuda_kernel_epilog( gpu_device, gpu_task ); + gpu_device->super.executed_tasks++; + __parsec_complete_execution( es, gpu_task->ec ); + /** * @brief For tasks migrated after stage_ in, during the first stage_in * we would have increased the refcount of the data_in. If the task was not @@ -2966,9 +2970,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); } - parsec_cuda_kernel_epilog( gpu_device, gpu_task ); - gpu_device->super.executed_tasks++; - #if defined(PARSEC_PROF_TRACE) gpu_dev_prof_t prof_info; prof_info.device_index = gpu_device->super.device_index; @@ -2986,19 +2987,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); #endif - /** - * responsibility of task completion moved from GPU manager thread - * to the CPU threads. A special case is added to __parsec_task_progress() - * to deal with task completion of the GPU tasks. As the gpu_task->ec->status - * is updated, the spaecial case will make sure that the the tasks will not be - * executed again. - */ - - gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE; - int distance = 0; - PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec); - __parsec_schedule(es, gpu_task->ec, distance); - if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) { parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); From ca0707878aa3b9660e5374e10a38947164ef1f45 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 28 Jul 2022 12:47:59 -0400 Subject: [PATCH 160/215] Check for NULL in the gpu_task->original_data_in. Check for NULL is important. Otherwise, in cases where the flow is a NEW on GPU it will cause problems. --- parsec/mca/device/cuda/device_cuda_module.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 67961a754..9f1abd0a4 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1396,7 +1396,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * first stage_in candidate. */ - if(gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) + if( /* Check for NULL is important. Otherwise, in cases where the flow is a NEW on GPU it will cause problems */ + gpu_task->original_data_in[ flow->flow_index ] != NULL && + gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) PARSEC_OBJ_RELEASE(task_data->data_in); /** From 294392b2af75ca6b17bc8788dc324312c0ab6967 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Jul 2022 10:33:57 -0400 Subject: [PATCH 161/215] tracing code changed --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++-- parsec/mca/device/cuda/device_cuda_migrate.h | 10 +++--- parsec/mca/device/cuda/device_cuda_module.c | 32 +++++++++++--------- parsec/mca/device/device_gpu.h | 23 +++++++------- 4 files changed, 39 insertions(+), 32 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 637b7d4b3..d9a5320e6 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -35,7 +35,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first{double};select{double};second{double};exec_time{double};stage{double};device_index{int32_t};task_count{int32_t};nb_tasks{int32_t};type{int32_t}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time{double};stage_in_time{double};device_index{int32_t};task_count{int32_t};waiting_tasks{int32_t};type{int32_t}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -442,7 +442,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ mig_task->starving_device = starving_device; mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - migrated_gpu_task->select = MPI_Wtime(); + #if defined(PARSEC_PROF_TRACE) + migrated_gpu_task->select_time = MPI_Wtime(); + #endif PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index befa217ae..35d39af6a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -56,14 +56,14 @@ typedef struct task_mapping_item_s typedef struct gpu_dev_prof_s { - double first; - double select; - double second; + double first_queue_time; + double select_time; + double second_queue_time; double exec_time; - double stage; + double stage_in_time; int32_t device_index; int32_t task_count; - int32_t nb_tasks; + int32_t waiting_tasks; int32_t type; } gpu_dev_prof_t; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 9f1abd0a4..4048f73fa 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2180,13 +2180,15 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, assert( NULL == stream->tasks[stream->start] ); - if(NULL != task && task->migrate_status != TASK_NOT_MIGRATED) - { - if( gpu_device->exec_stream[0] == stream ) //stage_in queue - task->stage = MPI_Wtime(); - else if( gpu_device->exec_stream[1] != stream) //execution queue +#if defined(PARSEC_PROF_TRACE) + if(NULL != task && task->migrate_status != TASK_NOT_MIGRATED) + { + if( gpu_device->exec_stream[0] == stream ) //stage_in time + task->stage_in_time = MPI_Wtime(); + else if( gpu_device->exec_stream[1] != stream) //execution time task->exec_time = MPI_Wtime(); - } + } +#endif /** * In case the task is succesfully progressed, the corresponding profiling * event is triggered. @@ -2775,13 +2777,15 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device +#if defined(PARSEC_PROF_TRACE) if(gpu_task->migrate_status == TASK_NOT_MIGRATED) - gpu_task->first = MPI_Wtime(); + gpu_task->first_queue_time = MPI_Wtime(); else { - gpu_task->second = MPI_Wtime(); - gpu_task->nb_tasks = parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); + gpu_task->second_queue_time = MPI_Wtime(); + gpu_task->waiting_tasks = parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); } +#endif if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); @@ -2976,12 +2980,12 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_dev_prof_t prof_info; prof_info.device_index = gpu_device->super.device_index; prof_info.task_count = gpu_device->mutex; - prof_info.first = gpu_task->first; - prof_info.select = gpu_task->select; - prof_info.second = gpu_task->second; + prof_info.first_queue_time = gpu_task->first_queue_time; + prof_info.select_time = gpu_task->select_time; + prof_info.second_queue_time = gpu_task->second_queue_time; prof_info.exec_time = gpu_task->exec_time; - prof_info.stage = gpu_task->stage; - prof_info.nb_tasks = gpu_task->nb_tasks; + prof_info.stage_in_time = gpu_task->stage_in_time; + prof_info.waiting_tasks = gpu_task->waiting_tasks; prof_info.type = gpu_task->migrate_status; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 9725a7390..47dcb1b9c 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -84,21 +84,22 @@ struct parsec_gpu_task_s { parsec_complete_stage_function_t complete_stage; parsec_stage_in_function_t *stage_in; parsec_stage_out_function_t *stage_out; - int migrate_status; - double first; - double select; - double second; - double stage; - double exec_time; - int32_t nb_tasks; - int32_t posssible_candidate[MAX_PARAM_COUNT]; - parsec_data_copy_t* candidate[MAX_PARAM_COUNT]; - parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; - int32_t data_retained; + int migrate_status; + int32_t posssible_candidate[MAX_PARAM_COUNT]; + parsec_data_copy_t* candidate[MAX_PARAM_COUNT]; + parsec_data_copy_t* original_data_in[MAX_PARAM_COUNT]; + int32_t data_retained; #if defined(PARSEC_PROF_TRACE) int prof_key_end; uint64_t prof_event_id; uint32_t prof_tp_id; + + double first_queue_time; + double select_time; + double second_queue_time; + double stage_in_time; + double exec_time; + int32_t waiting_tasks; #endif union { struct { From 6fde742e8e31924191f9586ee5d10c8067d08b5d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Jul 2022 15:37:06 -0400 Subject: [PATCH 162/215] Statistics made optional and device->mutex used to find starvation. --- .../mca/device/cuda/device_cuda_component.c | 4 + parsec/mca/device/cuda/device_cuda_migrate.c | 89 +++++++++---------- parsec/mca/device/cuda/device_cuda_migrate.h | 1 - parsec/mca/device/cuda/device_cuda_module.c | 38 ++++---- 4 files changed, 65 insertions(+), 67 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 750be42bb..6de117b92 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -41,6 +41,7 @@ int parsec_cuda_sort_pending = 0, parsec_cuda_max_streams = PARSEC_GPU_MAX_STREA int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; char* parsec_cuda_lib_path = NULL; int parsec_cuda_migrate_tasks = 0; +int parsec_migrate_statistics = 0; int parsec_cuda_iterative = 0; int parsec_cuda_migrate_chunk_size = 0; @@ -203,6 +204,9 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_tasks", "Boolean to let the GPU engine migrate tasks", false, false, 0, &parsec_cuda_migrate_tasks); + (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_statistics", + "Boolean to print migrate statistics", + false, false, 0, &parsec_migrate_statistics); (void)parsec_mca_param_reg_int_name("device_cuda", "iterative", "Boolean to let the GPU know the workload is iterative", false, false, 0, &parsec_cuda_iterative); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d9a5320e6..9178e87ee 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -2,6 +2,7 @@ extern int parsec_device_cuda_enabled; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) +extern int parsec_migrate_statistics; parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; // list of all migrated task @@ -60,7 +61,6 @@ int parsec_cuda_migrate_init(int ndevices) NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *)calloc(ndevices, sizeof(parsec_device_cuda_info_t)); migrated_task_list = PARSEC_OBJ_NEW(parsec_list_t); - ; for (i = 0; i < NDEVICES; i++) { @@ -83,13 +83,6 @@ int parsec_cuda_migrate_init(int ndevices) gpu_dev_profiling_init(); #endif - char hostname[256]; - gethostname(hostname, sizeof(hostname)); - printf("PID %d on %s ready for attach\n", getpid(), hostname); - // sleep(60); - - printf("Migration module initialised for %d devices \n", NDEVICES); - return 0; } @@ -108,26 +101,27 @@ int parsec_cuda_migrate_fini() PARSEC_OBJ_RELEASE(task_mapping_ht); task_mapping_ht = NULL; - for (i = 0; i < NDEVICES; i++) + if(parsec_migrate_statistics) { - printf("\n*********** DEVICE %d *********** \n", i); - printf("Total tasks executed: %d \n", device_info[i].total_tasks_executed); - printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", - device_info[i].level0, device_info[i].level1, device_info[i].level2, - device_info[i].level0 + device_info[i].level1 + device_info[i].level2); - printf("Task check : level0 %d level1 %d level2 %d total %d \n", - parsec_cuda_get_device_task(i, 0), - parsec_cuda_get_device_task(i, 1), - parsec_cuda_get_device_task(i, 2), - parsec_cuda_get_device_task(i, -1)); - printf("Task received : %d \n", device_info[i].received); + for (i = 0; i < NDEVICES; i++) + { + printf("\n*********** DEVICE %d *********** \n", i); + printf("Total tasks executed: %d \n", device_info[i].total_tasks_executed); + printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", + device_info[i].level0, device_info[i].level1, device_info[i].level2, + device_info[i].level0 + device_info[i].level1 + device_info[i].level2); + printf("Task check : level0 %d level1 %d level2 %d total %d \n", + parsec_cuda_get_device_task(i, 0), + parsec_cuda_get_device_task(i, 1), + parsec_cuda_get_device_task(i, 2), + parsec_cuda_get_device_task(i, -1)); + printf("Task received : %d \n", device_info[i].received); + } } printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); - printf("Migration module shut down \n"); - return 0; } @@ -195,7 +189,9 @@ int is_starving(int device) * starvtion if the number of ready tasks available is less than twice the * number of execution stream. */ - return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; + parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); + //return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; + return (d->mutex < 5) ? 1 : 0; } int will_starve(int device) @@ -205,7 +201,10 @@ int will_starve(int device) * starvtion if migrating a task will push the number of ready tasks available * to less than twice the number of execution stream. */ - return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; + parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); + //return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; + return (d->mutex < 5) ? 1 : 0; + } /** @@ -398,21 +397,23 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ if (migrated_gpu_task != NULL) // make sure the task was not returned to the queue { - - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) + if(parsec_migrate_statistics) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; + if (execution_level == 0) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; + } + if (execution_level == 1) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; + } + if (execution_level == 2) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; + } } nb_migrated++; parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); @@ -694,11 +695,9 @@ void clear_task_migrated_per_tp() void print_task_migrated_per_tp() { - printf("\n*********** TASKPOOL %d *********** \n", tp_count++); - printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); -} - -int get_tp_count() -{ - return tp_count; + if(parsec_migrate_statistics) + { + printf("\n*********** TASKPOOL %d *********** \n", tp_count++); + printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); + } } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 35d39af6a..3d404b9aa 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -95,6 +95,5 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); void print_task_migrated_per_tp(); -int get_tp_count(); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 4048f73fa..74dca0c61 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -53,6 +53,8 @@ extern int parsec_cuda_iterative; extern int parsec_cuda_migrate_chunk_size; extern int parsec_gpu_task_count_start; extern int parsec_gpu_task_count_end; +extern int parsec_cuda_migrate_tasks; +extern int parsec_migrate_statistics; /* look up how many FMA per cycle in single/double, per cuda MP * precision. @@ -2116,17 +2118,20 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if( NULL != task ) { PARSEC_PUSH_TASK(stream->fifo_pending, (parsec_list_item_t*)task); - if (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + if(parsec_migrate_statistics) { - if(stream == gpu_device->exec_stream[0]) - { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); - } - else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) + if (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); + if(stream == gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); + } + else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) + { + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); + parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); + } } } @@ -2774,7 +2779,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + if (parsec_migrate_statistics && (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL)) parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device #if defined(PARSEC_PROF_TRACE) @@ -2783,7 +2788,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, else { gpu_task->second_queue_time = MPI_Wtime(); - gpu_task->waiting_tasks = parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1); + gpu_task->waiting_tasks = gpu_device->mutex - 1; } #endif @@ -2993,7 +2998,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); #endif - if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + if ((gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && parsec_migrate_statistics) { parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); @@ -3019,15 +3024,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } gpu_task = progress_task; - //printf(" tp %d time %lf device %d level0 %d level1 %d level2 %d total %d \n", - // get_tp_count(), - // current_time(), - // CUDA_DEVICE_NUM(gpu_device->super.device_index), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 0), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 1), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), 2), - // parsec_cuda_get_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), -1)); - /** * @brief Before a new task is selectd by the device manager for execution, * the manager checks if there are any starving devices and migrate tasks, From f7b0c02beb961c257e68c25f6fb80dc7b1659453 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 29 Jul 2022 15:47:04 -0400 Subject: [PATCH 163/215] Code cleanup --- parsec/mca/device/cuda/device_cuda_migrate.c | 9 +-------- parsec/mca/device/cuda/device_cuda_migrate.h | 10 ---------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9178e87ee..f63406320 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -51,11 +51,6 @@ static void gpu_dev_profiling_init() int parsec_cuda_migrate_init(int ndevices) { int i, j; - -#if defined(PARSEC_HAVE_CUDA) - nvmlReturn_t nvml_ret; -#endif - start = MPI_Wtime(); NDEVICES = ndevices; @@ -64,10 +59,8 @@ int parsec_cuda_migrate_init(int ndevices) for (i = 0; i < NDEVICES; i++) { - for (j = 0; j < EXECUTION_LEVEL; j++) - device_info[i].task_count[j] = 0; + for (j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; device_info[i].load = 0; - device_info[i].level0 = 0; device_info[i].level1 = 0; device_info[i].level2 = 0; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 3d404b9aa..37e724843 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -74,22 +74,12 @@ int parsec_cuda_set_device_task(int device, int task_count, int level); int parsec_cuda_tasks_executed(int device); int is_starving(int device); int find_starving_device(int dealer_device); -parsec_device_gpu_module_t *parsec_cuda_change_device(int dealer_device_index); int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t *mig_task); int parsec_cuda_mig_task_dequeue(parsec_execution_stream_t *es); -int migrate_immediate(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t *migrated_gpu_task); int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device); -int parsec_gpu_data_reserve_device_space_for_flow(parsec_device_gpu_module_t *gpu_device, - parsec_gpu_task_t *gpu_task, const parsec_flow_t *flow); -int migrate_data_d2d(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *src_dev, - parsec_device_gpu_module_t *dest_dev); int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); -int gurantee_ownership_transfer(parsec_gpu_task_t *gpu_task, parsec_data_t *data, int flow_index, - parsec_data_copy_t *src_copy, parsec_data_copy_t *dst_copy, - uint8_t stage_in_device, uint8_t access_mode); double current_time(); int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); From 1294ccc7b4ef8be682cb49d22cdea84bddddcecd Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 5 Aug 2022 14:57:08 -0400 Subject: [PATCH 164/215] parsec/mca/device/device.c updated. --- parsec/mca/device/cuda/device_cuda_migrate.c | 35 ++++++++++++++++++-- parsec/mca/device/device.c | 28 +++++++++++++--- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index f63406320..38d8ba970 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -335,8 +335,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ * @brief Tasks are searched in different levels one by one. At this point we assume * that the cost of migration increases, as the level increase. */ - - // level 0 - task is just pushed to the device queue + + //#if 0 + //level 0 - task is just pushed to the device queue migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 execution_level = 0; if (migrated_gpu_task == NULL) @@ -360,6 +361,36 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } } } + //#endif + + #if 0 + if (migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + if (migrated_gpu_task != NULL) + { + execution_level = 2; + stream_index = 2 + j; + break; + } + } + + if (migrated_gpu_task == NULL) + { + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 + execution_level = 0; + + if (migrated_gpu_task == NULL) + { + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + execution_level = 1; + } + } + } + #endif if (migrated_gpu_task != NULL) { diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 7ac902422..ca756eebc 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -60,6 +60,14 @@ float *parsec_device_sweight = NULL; float *parsec_device_dweight = NULL; float *parsec_device_tweight = NULL; +/** + * Load balance skew we are willing to accept to favor RO data reuse + * on GPU: a value of 20% means that we will schedule tasks on the preferred + * GPU except if it is loaded 1.2 times as much as the best load balance option + */ +static int parsec_device_load_balance_skew = 20; +static float load_balance_skew; + /** * Try to find the best device to execute the kernel based on the compute * capability of the device. @@ -71,14 +79,18 @@ float *parsec_device_tweight = NULL; * -1 - if the kernel is scheduled to be executed on a GPU. */ -static int parsec_device_load_balance_skew = 20; -static float load_balance_skew; - int parsec_get_best_device( parsec_task_t* this_task, double ratio ) { int i, dev_index = -1, data_index, prefer_index = -1; parsec_taskpool_t* tp = this_task->taskpool; + if(parsec_cuda_iterative) + { + // if task to device mapping is already available use that + dev_index = find_task_to_device_mapping(this_task); + if(dev_index != -1) return dev_index; + } + /* Select the location of the first data that is used in READ/WRITE or pick the * location of one of the READ data. For now use the last one. */ @@ -183,7 +195,6 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } - return dev_index; } @@ -208,6 +219,13 @@ int parsec_mca_device_init(void) (void)parsec_mca_param_reg_int_name("device", "show_statistics", "Show the detailed devices statistics upon exit", false, false, 0, NULL); + (void)parsec_mca_param_reg_int_name("device", "load_balance_skew", + "Allow load balancing to skew by x%% to favor data reuse", + false, false, 0, NULL); + if( 0 < (rc = parsec_mca_param_find("device", NULL, "load_balance_skew")) ) { + parsec_mca_param_lookup_int(rc, &parsec_device_load_balance_skew); + } + load_balance_skew = 1.f/(parsec_device_load_balance_skew/100.f+1.f); if( 0 < (rc = parsec_mca_param_find("device", NULL, "verbose")) ) { parsec_mca_param_lookup_int(rc, &parsec_device_verbose); } @@ -935,4 +953,4 @@ int parsec_devices_release_memory(void) } } return PARSEC_SUCCESS; -} +} \ No newline at end of file From 746b64bda01b1b7b1eac1fe532a1d3d7b70a8992 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Fri, 5 Aug 2022 19:55:46 -0400 Subject: [PATCH 165/215] More statistics added --- parsec/mca/device/cuda/device_cuda_migrate.c | 26 ++++++++++++-------- parsec/mca/device/cuda/device_cuda_migrate.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 38d8ba970..2a9c1d000 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -67,6 +67,7 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].total_tasks_executed = 0; device_info[i].received = 0; device_info[i].last_device = i; + device_info[i].deal_count = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -81,7 +82,9 @@ int parsec_cuda_migrate_init(int ndevices) int parsec_cuda_migrate_fini() { - int i; + int i = 0; + int tot_task_migrated = 0; + float avg_task_migrated = 0; end = MPI_Wtime(); @@ -98,17 +101,18 @@ int parsec_cuda_migrate_fini() { for (i = 0; i < NDEVICES; i++) { + tot_task_migrated = device_info[i].level0 + device_info[i].level1 + device_info[i].level2; + avg_task_migrated = ((float)tot_task_migrated) / ((float)device_info[i].deal_count); + printf("\n*********** DEVICE %d *********** \n", i); - printf("Total tasks executed: %d \n", device_info[i].total_tasks_executed); - printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", + printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); + printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", device_info[i].level0, device_info[i].level1, device_info[i].level2, - device_info[i].level0 + device_info[i].level1 + device_info[i].level2); - printf("Task check : level0 %d level1 %d level2 %d total %d \n", - parsec_cuda_get_device_task(i, 0), - parsec_cuda_get_device_task(i, 1), - parsec_cuda_get_device_task(i, 2), - parsec_cuda_get_device_task(i, -1)); - printf("Task received : %d \n", device_info[i].received); + tot_task_migrated); + printf("Task received : %d \n", device_info[i].received); + printf("Deal Count : %d \n", device_info[i].deal_count); + printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); + printf("Avg task migrated per deal : %lf \n", avg_task_migrated); } } printf("\n---------Execution time = %lf ------------ \n", end - start); @@ -328,6 +332,8 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ continue; starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); + device_info[dealer_device_index].deal_count++; + for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 37e724843..0b01b41b4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -37,6 +37,7 @@ typedef struct parsec_device_cuda_info_s int level2; int received; int last_device; + int deal_count; } parsec_device_cuda_info_t; typedef struct migrated_task_s From 4434bca30ce5e7eb180ebd1b1123c351d9032ea6 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 13 Aug 2022 00:37:07 -0400 Subject: [PATCH 166/215] more statistics added. --- parsec/mca/device/cuda/device_cuda_migrate.c | 27 ++++++++++++++------ parsec/mca/device/cuda/device_cuda_migrate.h | 1 + 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 2a9c1d000..761a2c26a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -68,6 +68,7 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].received = 0; device_info[i].last_device = i; device_info[i].deal_count = 0; + device_info[i].success_count = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -84,7 +85,7 @@ int parsec_cuda_migrate_fini() { int i = 0; int tot_task_migrated = 0; - float avg_task_migrated = 0; + float avg_task_migrated = 0, deal_success_perc = 0, avg_task_migrated_per_sucess; end = MPI_Wtime(); @@ -103,16 +104,21 @@ int parsec_cuda_migrate_fini() { tot_task_migrated = device_info[i].level0 + device_info[i].level1 + device_info[i].level2; avg_task_migrated = ((float)tot_task_migrated) / ((float)device_info[i].deal_count); + deal_success_perc = (((float)device_info[i].success_count) / ((float)device_info[i].deal_count)) * 100; + avg_task_migrated_per_sucess = ((float)tot_task_migrated) / ((float)device_info[i].success_count); printf("\n*********** DEVICE %d *********** \n", i); - printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); - printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", + printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); + printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", device_info[i].level0, device_info[i].level1, device_info[i].level2, tot_task_migrated); - printf("Task received : %d \n", device_info[i].received); - printf("Deal Count : %d \n", device_info[i].deal_count); - printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); - printf("Avg task migrated per deal : %lf \n", avg_task_migrated); + printf("Task received : %d \n", device_info[i].received); + printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); + printf("Total deals : %d \n", device_info[i].deal_count); + printf("Successful deals : %d \n", device_info[i].success_count); + printf("Avg task migrated per deal : %lf \n", avg_task_migrated); + printf("Perc of successfull deals : %lf \n", deal_success_perc); + printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); } } printf("\n---------Execution time = %lf ------------ \n", end - start); @@ -314,6 +320,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0, stream_index = 0; + int deal_success = 0; int i = 0, j = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; @@ -333,7 +340,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); device_info[dealer_device_index].deal_count++; - + deal_success = 0; for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { @@ -446,6 +453,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } } nb_migrated++; + deal_success++; parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); /** @@ -492,6 +500,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ break; } // end for i + if(deal_success > 0) + device_info[dealer_device_index].success_count++; + if (will_starve(dealer_device_index)) break; } // end for d diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 0b01b41b4..d9a420542 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -38,6 +38,7 @@ typedef struct parsec_device_cuda_info_s int received; int last_device; int deal_count; + int success_count; } parsec_device_cuda_info_t; typedef struct migrated_task_s From 751f6f6cb61268ecf2be6d9ce4243cb82e23e119 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 22 Aug 2022 21:11:18 -0400 Subject: [PATCH 167/215] If the task is not a compute task, it is pushed back immediatly. --- parsec/mca/device/cuda/device_cuda_migrate.c | 194 ++++++++----------- 1 file changed, 86 insertions(+), 108 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 761a2c26a..307093fb2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -349,14 +349,38 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ * that the cost of migration increases, as the level increase. */ - //#if 0 //level 0 - task is just pushed to the device queue - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(&(dealer_device->pending)); // level 0 + + if ( migrated_gpu_task != NULL) + { + /** + * @brief if the task is a not a computational kerenel or if it is a task that has + * already been migrated, we stop the migration and push it back to the queue. + */ + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + + } execution_level = 0; + if (migrated_gpu_task == NULL) { // level1 - task is aavailble in the stage_in queue. Stage_in not started. - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + + if ( migrated_gpu_task != NULL) + { + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + + } execution_level = 1; if (migrated_gpu_task == NULL) @@ -364,7 +388,18 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + + if ( migrated_gpu_task != NULL) + { + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + { + parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); + migrated_gpu_task = NULL; + } + + } + if (migrated_gpu_task != NULL) { execution_level = 2; @@ -374,126 +409,69 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } } } - //#endif - #if 0 - if (migrated_gpu_task == NULL) - { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) - { - // level2 - task is available in one of the execution queue stage_in is complete - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 - if (migrated_gpu_task != NULL) - { - execution_level = 2; - stream_index = 2 + j; - break; - } - } - - if (migrated_gpu_task == NULL) - { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(&(dealer_device->pending)); // level 0 - execution_level = 0; - - if (migrated_gpu_task == NULL) - { - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_try_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 - execution_level = 1; - } - } - } - #endif if (migrated_gpu_task != NULL) { assert(migrated_gpu_task->ec != NULL); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); - /** - * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration and push it back to the queue. - */ - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + + if(parsec_migrate_statistics) { if (execution_level == 0) { - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; } - else if (execution_level == 1) + if (execution_level == 1) { - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; } - else if (execution_level == 2) + if (execution_level == 2) { - parsec_list_push_back(dealer_device->exec_stream[stream_index]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; } } + nb_migrated++; + deal_success++; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - if (migrated_gpu_task != NULL) // make sure the task was not returned to the queue - { - if(parsec_migrate_statistics) - { - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; - } - } - nb_migrated++; - deal_success++; - parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - - /** - * @brief change migrate_status according to the status of the stage in of the - * stage_in data. - */ - if (execution_level == 2) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; - else - migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; - - /** - * @brief An object of type migrated_task_t is created store the migrated task - * and other associated details. This object is enqueued to a node level queue. - * The main objective of this was to make sure that the manager does not have to sepend - * time on migration. It can select the task for migration, enqueue it to the node level - * queue and then return to its normal working. - */ - mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); - PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); - - mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = starving_device; - mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - - #if defined(PARSEC_PROF_TRACE) - migrated_gpu_task->select_time = MPI_Wtime(); - #endif - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); - parsec_cuda_mig_task_enqueue(es, mig_task); - - device_info[dealer_device_index].last_device = starving_device_index; - - char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), - execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); - } + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + /** + * @brief An object of type migrated_task_t is created store the migrated task + * and other associated details. This object is enqueued to a node level queue. + * The main objective of this was to make sure that the manager does not have to sepend + * time on migration. It can select the task for migration, enqueue it to the node level + * queue and then return to its normal working. + */ + mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + + mig_task->gpu_task = migrated_gpu_task; + for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; + + #if defined(PARSEC_PROF_TRACE) + migrated_gpu_task->select_time = MPI_Wtime(); + #endif + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + parsec_cuda_mig_task_enqueue(es, mig_task); + device_info[dealer_device_index].last_device = starving_device_index; + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); } if (will_starve(dealer_device_index)) From 75056b95c4b6c5a72c826ab2486252aeebee04e2 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 23 Aug 2022 00:27:11 -0400 Subject: [PATCH 168/215] More statistics added --- parsec/mca/device/cuda/device_cuda_migrate.c | 77 +++++++++++++++++--- parsec/mca/device/cuda/device_cuda_migrate.h | 5 ++ parsec/mca/device/cuda/device_cuda_module.c | 18 ++++- 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 307093fb2..24c3c1c5a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -60,15 +60,16 @@ int parsec_cuda_migrate_init(int ndevices) for (i = 0; i < NDEVICES; i++) { for (j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; - device_info[i].load = 0; - device_info[i].level0 = 0; - device_info[i].level1 = 0; - device_info[i].level2 = 0; + device_info[i].load = 0; + device_info[i].level0 = 0; + device_info[i].level1 = 0; + device_info[i].level2 = 0; device_info[i].total_tasks_executed = 0; - device_info[i].received = 0; - device_info[i].last_device = i; - device_info[i].deal_count = 0; - device_info[i].success_count = 0; + device_info[i].received = 0; + device_info[i].last_device = i; + device_info[i].deal_count = 0; + device_info[i].success_count = 0; + device_info[i].ready_compute_tasks = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -86,6 +87,11 @@ int parsec_cuda_migrate_fini() int i = 0; int tot_task_migrated = 0; float avg_task_migrated = 0, deal_success_perc = 0, avg_task_migrated_per_sucess; + int summary_total_tasks_executed = 0, summary_total_compute_tasks_executed = 0; + int summary_total_tasks_migrated = 0, summary_total_l0_tasks_migrated = 0, summary_total_l1_tasks_migrated = 0, summary_total_l2_tasks_migrated = 0; + int summary_deals = 0, summary_successful_deals = 0; + float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; + end = MPI_Wtime(); @@ -103,23 +109,51 @@ int parsec_cuda_migrate_fini() for (i = 0; i < NDEVICES; i++) { tot_task_migrated = device_info[i].level0 + device_info[i].level1 + device_info[i].level2; + summary_total_tasks_migrated += tot_task_migrated; + summary_total_l0_tasks_migrated += device_info[i].level0; + summary_total_l1_tasks_migrated += device_info[i].level1; + summary_total_l2_tasks_migrated += device_info[i].level2; avg_task_migrated = ((float)tot_task_migrated) / ((float)device_info[i].deal_count); deal_success_perc = (((float)device_info[i].success_count) / ((float)device_info[i].deal_count)) * 100; avg_task_migrated_per_sucess = ((float)tot_task_migrated) / ((float)device_info[i].success_count); - printf("\n*********** DEVICE %d *********** \n", i); + printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); + summary_total_tasks_executed += device_info[i].total_tasks_executed; + printf("Total compute tasks executed : %d \n", device_info[i].total_compute_tasks); + summary_total_compute_tasks_executed += device_info[i].total_compute_tasks; printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", device_info[i].level0, device_info[i].level1, device_info[i].level2, tot_task_migrated); printf("Task received : %d \n", device_info[i].received); printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); printf("Total deals : %d \n", device_info[i].deal_count); + summary_deals += device_info[i].deal_count; printf("Successful deals : %d \n", device_info[i].success_count); + summary_successful_deals += device_info[i].success_count; printf("Avg task migrated per deal : %lf \n", avg_task_migrated); - printf("Perc of successfull deals : %lf \n", deal_success_perc); printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); + printf("perc of successfull deals : %lf \n", deal_success_perc); + printf("Ready compute task count : %d \n", device_info[i].ready_compute_tasks); } + + printf("\n *********** SUMMARY *********** \n"); + printf("Total tasks executed : %d \n", summary_total_tasks_executed); + printf("Total compute tasks executed : %d \n", summary_total_compute_tasks_executed); + printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", + summary_total_l0_tasks_migrated, summary_total_l1_tasks_migrated, summary_total_l2_tasks_migrated, + summary_total_tasks_migrated); + printf("Total deals : %d \n", summary_deals); + printf("Successful deals : %d \n", summary_successful_deals); + + summary_avg_task_migrated = ((float)summary_total_tasks_migrated) / ((float)summary_deals); + summary_avg_task_migrated_per_sucess = ((float)summary_total_tasks_migrated) / ((float)summary_successful_deals); + summary_deal_success_perc = (((float)summary_successful_deals) / ((float)summary_deals)) * 100; + + printf("Avg task migrated per deal : %lf \n", summary_avg_task_migrated); + printf("Avg task migrated per successfull deal : %lf \n", summary_avg_task_migrated_per_sucess); + printf("perc of successfull deals : %lf \n", summary_deal_success_perc); + } printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); @@ -407,7 +441,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ break; } } - } + } //end of j } @@ -416,6 +450,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ assert(migrated_gpu_task->ec != NULL); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + // keep track of compute task count. Decrement compute task count. + dec_compute_task_count( dealer_device_index ); + if(parsec_migrate_statistics) { if (execution_level == 0) @@ -720,3 +757,21 @@ void print_task_migrated_per_tp() printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); } } + +int inc_compute_task_count(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].ready_compute_tasks); + return device_info[device_index].ready_compute_tasks; +} + +int dec_compute_task_count(int device_index) +{ + parsec_atomic_fetch_dec_int32(&device_info[device_index].ready_compute_tasks); + return device_info[device_index].ready_compute_tasks; +} + +int inc_compute_tasks_executed(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].total_compute_tasks); + return device_info[device_index].total_compute_tasks; +} diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index d9a420542..045446e17 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -39,6 +39,8 @@ typedef struct parsec_device_cuda_info_s int last_device; int deal_count; int success_count; + int ready_compute_tasks; + int total_compute_tasks; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -87,5 +89,8 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); void print_task_migrated_per_tp(); +int dec_compute_task_count(int device_index); +int inc_compute_task_count(int device_index); +int inc_compute_tasks_executed(int device_index); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 74dca0c61..f620dc2ae 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2782,6 +2782,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if (parsec_migrate_statistics && (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL)) parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device + // keep track of compute task count. Increment compute task count. + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + inc_compute_task_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + #if defined(PARSEC_PROF_TRACE) if(gpu_task->migrate_status == TASK_NOT_MIGRATED) gpu_task->first_queue_time = MPI_Wtime(); @@ -2945,6 +2949,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, goto check_in_deps; complete_task: + + if (parsec_migrate_statistics) + parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); + assert( NULL != gpu_task ); PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tComplete %s", gpu_device->super.name, @@ -2998,11 +3006,17 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); #endif - if ((gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && parsec_migrate_statistics) + if (parsec_migrate_statistics) { parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); - parsec_cuda_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); + + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + inc_compute_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); } + + //keep track of compute task count. Decrement compute task count. + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) + dec_compute_task_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); remove_gpu_task: // Load problem: was parsec_device_load[gpu_device->super.device_index] -= gpu_task->load; From 109cad78f8e2135e71e507c388536562dd83f29f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 23 Aug 2022 00:50:16 -0400 Subject: [PATCH 169/215] get_compute_tasks_executed() added to return the ready compute tasks. --- parsec/mca/device/cuda/device_cuda_migrate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 24c3c1c5a..1b8eeb88b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -229,6 +229,8 @@ int is_starving(int device) parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); //return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; return (d->mutex < 5) ? 1 : 0; + + //return (get_compute_tasks_executed(device) < 5) ? 1 : 0; } int will_starve(int device) @@ -242,6 +244,8 @@ int will_starve(int device) //return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; return (d->mutex < 5) ? 1 : 0; + //return (get_compute_tasks_executed(device) < 5) ? 1 : 0; + } /** @@ -775,3 +779,9 @@ int inc_compute_tasks_executed(int device_index) parsec_atomic_fetch_inc_int32(&device_info[device_index].total_compute_tasks); return device_info[device_index].total_compute_tasks; } + +int get_compute_tasks_executed(int device_index) +{ + return device_info[device_index].total_compute_tasks; +} + From e55ee841fbf00cb2f9d91c3135dc0f48fabf9179 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 23 Aug 2022 01:18:16 -0400 Subject: [PATCH 170/215] starvation conditions updated. is_starving() will use total tasks to be completed to detect starvation. will_starve() will use compute tasks yet to be completed, to detect possible starvation. --- parsec/mca/device/cuda/device_cuda_migrate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1b8eeb88b..3223d43e1 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -227,9 +227,9 @@ int is_starving(int device) * number of execution stream. */ parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); - //return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; return (d->mutex < 5) ? 1 : 0; + //return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; //return (get_compute_tasks_executed(device) < 5) ? 1 : 0; } @@ -240,11 +240,11 @@ int will_starve(int device) * starvtion if migrating a task will push the number of ready tasks available * to less than twice the number of execution stream. */ - parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); - //return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; - return (d->mutex < 5) ? 1 : 0; + //parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); + //return (d->mutex < 5) ? 1 : 0; - //return (get_compute_tasks_executed(device) < 5) ? 1 : 0; + //return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; + return (get_compute_tasks_executed(device) < 5) ? 1 : 0; } From ca8b910a0c2f85729f0988182dd03f6786db87f3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 24 Aug 2022 01:32:00 -0400 Subject: [PATCH 171/215] affinity count of task migrated calculated --- parsec/mca/device/cuda/device_cuda_migrate.c | 64 ++++++++++++++++++-- parsec/mca/device/cuda/device_cuda_migrate.h | 3 +- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 3223d43e1..fccab5d88 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -70,6 +70,7 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].deal_count = 0; device_info[i].success_count = 0; device_info[i].ready_compute_tasks = 0; + device_info[i].affinity_count = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -89,7 +90,7 @@ int parsec_cuda_migrate_fini() float avg_task_migrated = 0, deal_success_perc = 0, avg_task_migrated_per_sucess; int summary_total_tasks_executed = 0, summary_total_compute_tasks_executed = 0; int summary_total_tasks_migrated = 0, summary_total_l0_tasks_migrated = 0, summary_total_l1_tasks_migrated = 0, summary_total_l2_tasks_migrated = 0; - int summary_deals = 0, summary_successful_deals = 0; + int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; @@ -125,6 +126,9 @@ int parsec_cuda_migrate_fini() printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", device_info[i].level0, device_info[i].level1, device_info[i].level2, tot_task_migrated); + printf("Tasks with affinity migrated : %d \n", device_info[i].affinity_count); + printf("Perc of affinity tasks : %lf \n", ( (float) device_info[i].affinity_count/ tot_task_migrated) * 100); + summary_affinity += device_info[i].affinity_count; printf("Task received : %d \n", device_info[i].received); printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); printf("Total deals : %d \n", device_info[i].deal_count); @@ -133,8 +137,7 @@ int parsec_cuda_migrate_fini() summary_successful_deals += device_info[i].success_count; printf("Avg task migrated per deal : %lf \n", avg_task_migrated); printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); - printf("perc of successfull deals : %lf \n", deal_success_perc); - printf("Ready compute task count : %d \n", device_info[i].ready_compute_tasks); + printf("Perc of successfull deals : %lf \n", deal_success_perc); } printf("\n *********** SUMMARY *********** \n"); @@ -143,6 +146,8 @@ int parsec_cuda_migrate_fini() printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", summary_total_l0_tasks_migrated, summary_total_l1_tasks_migrated, summary_total_l2_tasks_migrated, summary_total_tasks_migrated); + printf("Tasks with affinity migrated : %d \n", summary_affinity); + printf("Perc of affinity tasks : %lf \n", ( (float) summary_affinity / summary_total_tasks_migrated) * 100); printf("Total deals : %d \n", summary_deals); printf("Successful deals : %d \n", summary_successful_deals); @@ -358,7 +363,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0, stream_index = 0; - int deal_success = 0; + int deal_success = 0, device_affinity = 0; int i = 0, j = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; @@ -479,6 +484,17 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ deal_success++; parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); + if(parsec_migrate_statistics) + { + if (execution_level == 2) + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + else + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + + if (device_affinity) + device_info[dealer_device_index].affinity_count++; + } + /** * @brief change migrate_status according to the status of the stage in of the * stage_in data. @@ -785,3 +801,43 @@ int get_compute_tasks_executed(int device_index) return device_info[device_index].total_compute_tasks; } + +int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status) +{ + int i, data_index; + parsec_data_t *original = NULL; + parsec_data_copy_t *data_copy = NULL; + parsec_task_t* this_task = gpu_task->ec; + + for( i = 0; i < this_task->task_class->nb_flows; i++ ) + { + if( NULL == this_task->data[i].data_in ) continue; + if( NULL == this_task->data[i].source_repo_entry ) continue; + + if (status == TASK_MIGRATED_BEFORE_STAGE_IN) //data will be trasfered from data_in + { + original = this_task->data[i].data_in->original; + data_copy = this_task->data[i].data_in; + } + else //data will be trasfered from data_out + { + original = this_task->data[i].data_out->original; + data_copy = this_task->data[i].data_out; + } + + if (original->device_copies[device_index] != NULL && + data_copy->version == original->device_copies[device_index]->version) + + { + /** + * If both the both the data copy has the same version, there is no need + * for a data transfer. + */ + return 1; + } + } + + return 0; + +} + diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 045446e17..da2eb7e60 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -41,6 +41,7 @@ typedef struct parsec_device_cuda_info_s int success_count; int ready_compute_tasks; int total_compute_tasks; + int affinity_count; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -92,5 +93,5 @@ void print_task_migrated_per_tp(); int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); - +int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); #endif From cc83b6aed2414c0cb6ee499d5345d7277fc406bf Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 24 Aug 2022 02:26:32 -0400 Subject: [PATCH 172/215] Migrate task only with affinity to the starving device (code commented out). --- parsec/mca/device/cuda/device_cuda_migrate.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index fccab5d88..b59571fcb 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -401,7 +401,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ * @brief if the task is a not a computational kerenel or if it is a task that has * already been migrated, we stop the migration and push it back to the queue. */ - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) { parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); migrated_gpu_task = NULL; @@ -417,7 +419,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ if ( migrated_gpu_task != NULL) { - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */ ) { parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); migrated_gpu_task = NULL; @@ -435,7 +439,9 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ if ( migrated_gpu_task != NULL) { - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED) + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + + if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */ ) { parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); migrated_gpu_task = NULL; From 4cb81518e0f95c1a5a1965f3b0eb0588f7441549 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 24 Aug 2022 03:00:00 -0400 Subject: [PATCH 173/215] more statistics added --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b59571fcb..cc7ba92bc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -122,6 +122,7 @@ int parsec_cuda_migrate_fini() printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); summary_total_tasks_executed += device_info[i].total_tasks_executed; printf("Total compute tasks executed : %d \n", device_info[i].total_compute_tasks); + printf("Perc of compute tasks : %lf \n", ((float)device_info[i].total_compute_tasks / device_info[i].total_tasks_executed) * 100); summary_total_compute_tasks_executed += device_info[i].total_compute_tasks; printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", device_info[i].level0, device_info[i].level1, device_info[i].level2, @@ -143,6 +144,7 @@ int parsec_cuda_migrate_fini() printf("\n *********** SUMMARY *********** \n"); printf("Total tasks executed : %d \n", summary_total_tasks_executed); printf("Total compute tasks executed : %d \n", summary_total_compute_tasks_executed); + printf("Perc of compute tasks : %lf \n", ((float)summary_total_compute_tasks_executed / summary_total_tasks_executed) * 100); printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", summary_total_l0_tasks_migrated, summary_total_l1_tasks_migrated, summary_total_l2_tasks_migrated, summary_total_tasks_migrated); From 2b6469b4a586c7e4a6b1cf4b4278b31eeb32eaa4 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 25 Aug 2022 20:12:05 -0400 Subject: [PATCH 174/215] PINS module task_granularity removed --- .../task_granularity/ValidateModule.CMake | 8 - .../task_granularity/pins_task_granularity.h | 25 --- .../pins_task_granularity_component.c | 74 ------- .../pins_task_granularity_module.c | 201 ------------------ 4 files changed, 308 deletions(-) delete mode 100644 parsec/mca/pins/task_granularity/ValidateModule.CMake delete mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity.h delete mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_component.c delete mode 100644 parsec/mca/pins/task_granularity/pins_task_granularity_module.c diff --git a/parsec/mca/pins/task_granularity/ValidateModule.CMake b/parsec/mca/pins/task_granularity/ValidateModule.CMake deleted file mode 100644 index 9e4a2787f..000000000 --- a/parsec/mca/pins/task_granularity/ValidateModule.CMake +++ /dev/null @@ -1,8 +0,0 @@ -if (PARSEC_PROF_PINS) - SET(MCA_${COMPONENT}_${MODULE} ON) - FILE(GLOB MCA_${COMPONENT}_${MODULE}_SOURCES ${MCA_BASE_DIR}/${COMPONENT}/${MODULE}/[^\\.]*.c) - SET(MCA_${COMPONENT}_${MODULE}_CONSTRUCTOR "${COMPONENT}_${MODULE}_static_component") -else (PARSEC_PROF_PINS) - MESSAGE(STATUS "Module ${MODULE} not selectable: PINS disabled.") - SET(MCA_${COMPONENT}_${MODULE} OFF) -endif (PARSEC_PROF_PINS) diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity.h b/parsec/mca/pins/task_granularity/pins_task_granularity.h deleted file mode 100644 index 2e05d8f1d..000000000 --- a/parsec/mca/pins/task_granularity/pins_task_granularity.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef PINS_task_granularity_H -#define PINS_task_granularity_H - -#include "parsec/parsec_config.h" -#include "parsec/runtime.h" -#include "parsec/mca/mca.h" -#include "parsec/mca/pins/pins.h" - - -#define NUM_SELECT_EVENTS 2 -#define SYSTEM_QUEUE_VP -2 - -BEGIN_C_DECLS - -/** - * Globally exported variable - */ -PARSEC_DECLSPEC extern const parsec_pins_base_component_t parsec_pins_task_granularity_component; -PARSEC_DECLSPEC extern const parsec_pins_module_t parsec_pins_task_granularity_module; -/* static accessor */ -mca_base_component_t * pins_task_granularity_static_component(void); - -END_C_DECLS - -#endif diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_component.c b/parsec/mca/pins/task_granularity/pins_task_granularity_component.c deleted file mode 100644 index 665ebb876..000000000 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_component.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "parsec/parsec_config.h" -#include "parsec/runtime.h" - -#include "parsec/mca/pins/pins.h" -#include "parsec/mca/pins/task_granularity/pins_task_granularity.h" - -/* - * Local function - */ -static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -const parsec_pins_base_component_t parsec_pins_task_granularity_component = { - - /* First, the mca_component_t struct containing meta information - about the component itself */ - - { - PARSEC_PINS_BASE_VERSION_2_0_0, - - /* Component name and version */ - "task_granularity", - "", /* options */ - PARSEC_VERSION_MAJOR, - PARSEC_VERSION_MINOR, - - /* Component open and close functions */ - NULL, - NULL, - pins_task_granularity_component_query, - /*< specific query to return the module and add it to the list of available modules */ - NULL, - "", /*< no reserve */ - }, - { - /* The component has no metadata */ - MCA_BASE_METADATA_PARAM_NONE, - "", /*< no reserve */ - } -}; -mca_base_component_t * pins_task_granularity_static_component(void) -{ - return (mca_base_component_t *)&parsec_pins_task_granularity_component; -} - -static int pins_task_granularity_component_query(mca_base_module_t **module, int *priority) -{ - /* module type should be: const mca_base_module_t ** */ - void *ptr = (void*)&parsec_pins_task_granularity_module; - *priority = 6; - *module = (mca_base_module_t *)ptr; - return MCA_SUCCESS; -} - diff --git a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c b/parsec/mca/pins/task_granularity/pins_task_granularity_module.c deleted file mode 100644 index 8d8034650..000000000 --- a/parsec/mca/pins/task_granularity/pins_task_granularity_module.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2012-2020 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - */ - -#include -#include - -#include "parsec/parsec_config.h" -#include "pins_task_granularity.h" -#include "parsec/mca/pins/pins.h" -#include "parsec/utils/debug.h" -#include "parsec/execution_stream.h" -#include "parsec/profiling.h" -#include "parsec/parsec_internal.h" -#include "parsec/os-spec-timing.h" - -static void pins_init_task_granularity(parsec_context_t* master_context); -static void pins_fini_task_granularity(parsec_context_t* master_context); -static void pins_thread_init_task_granularity(parsec_execution_stream_t* es); -static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es); - -static FILE *file_ptr; -parsec_atomic_lock_t lock; - -int task_granularity_trace_keyin; -int task_granularity_trace_keyout; - -typedef struct task_characteristics_s -{ - int taskpool_id; - int task_class_id; - int nb_data_items; - int total_data_size; - int priority; - int chore_id; -} task_characteristics_t; - -const parsec_pins_module_t parsec_pins_task_granularity_module = { - &parsec_pins_task_granularity_component, - { - pins_init_task_granularity, - pins_fini_task_granularity, - NULL, - NULL, - pins_thread_init_task_granularity, - pins_thread_fini_task_granularity - }, - { NULL } -}; - - -static void start_task_granularity_record(parsec_execution_stream_t* es, - parsec_task_t* task, - parsec_pins_next_callback_t* data); - -static void stop_task_granularity_record(parsec_execution_stream_t* es, - parsec_task_t* task, - parsec_pins_next_callback_t* data); - - -static void pins_init_task_granularity(parsec_context_t* master) -{ - (void)master; - parsec_profiling_add_dictionary_keyword("TASK_GRANULARITY", "fill:#FF0000", - sizeof(task_characteristics_t), - "taskpool_id{int32_t};task_class_id{int32_t};nb_data_items{int32_t};total_data_size{int32_t};priority{int32_t};chore_id{int32_t}", - &task_granularity_trace_keyin, - &task_granularity_trace_keyout); - -} - -static void pins_fini_task_granularity(parsec_context_t* master) -{ - (void)master; -} - -static void pins_thread_init_task_granularity(parsec_execution_stream_t* es) -{ - parsec_pins_next_callback_t* event_cb; - - event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); - PARSEC_PINS_REGISTER(es, EXEC_BEGIN, start_task_granularity_record, event_cb); - event_cb = (parsec_pins_next_callback_t*)malloc(sizeof(parsec_pins_next_callback_t)); - PARSEC_PINS_REGISTER(es, EXEC_END, stop_task_granularity_record, event_cb); -} - -static void pins_thread_fini_task_granularity(parsec_execution_stream_t* es) -{ - task_characteristics_t characteristics; - parsec_pins_next_callback_t* event_cb; - - PARSEC_PINS_UNREGISTER(es, EXEC_BEGIN, start_task_granularity_record, &event_cb); - free(event_cb); - PARSEC_PINS_UNREGISTER(es, EXEC_END, stop_task_granularity_record, &event_cb); - free(event_cb); -} - -static void start_task_granularity_record(parsec_execution_stream_t* es, - struct parsec_task_s* task, - parsec_pins_next_callback_t* data) -{ - - task_characteristics_t characteristics; - - PARSEC_PROFILING_TRACE(es->es_profile, - task_granularity_trace_keyin, - task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), - task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), - (void*)&characteristics); -} - -int find_chore(parsec_execution_stream_t* es, parsec_task_t* task) -{ - const parsec_task_class_t* tc = task->task_class; - uint8_t chore_mask = task->chore_mask; - parsec_evaluate_function_t* eval; - unsigned int chore_id; - int rc; - - /* Find first bit in chore_mask that is not 0 */ - for(chore_id = 0; NULL != tc->incarnations[chore_id].hook; chore_id++) - if( 0 != (chore_mask & (1<incarnations[chore_id].evaluate) ) { - rc = eval(task); - if( PARSEC_HOOK_RETURN_DONE != rc ) { - if( PARSEC_HOOK_RETURN_NEXT != rc ) { - break; - } - goto next_chore; - } - } - - return chore_id; - - next_chore: - /* Mark this chore as tested */ - chore_mask &= ~( 1<incarnations[chore_id].hook; chore_id++) - if( 0 != (chore_mask & (1<incarnations[chore_id].hook); - - return PARSEC_HOOK_RETURN_ERROR; -} - - -int find_data_size(parsec_execution_stream_t* es, parsec_task_t* task) -{ - int i, total_data = 0, nb_elements = 0, size = 0; - struct parsec_data_copy_s* task_data; - - for(i = 0; i < task->task_class->nb_flows; i++) - { - task_data = task->data[i].data_in; - if(task_data == NULL) - task_data = task->data[i].data_out; - - if(task_data != NULL) - { - if(task_data->arena_chunk != NULL && task_data->arena_chunk->origin != NULL) - total_data += task_data->arena_chunk->count * task_data->arena_chunk->origin->elem_size; - else if(task_data->original != NULL) - total_data += task_data->original->nb_elts; - else - printf("SOMETHING IS WRONG Name %s Id %d \n", task->task_class->name, task->task_class->task_class_id); - - } - } - return total_data; -} - - - -static void stop_task_granularity_record(parsec_execution_stream_t* es, - parsec_task_t* task, - parsec_pins_next_callback_t* data) -{ - task_characteristics_t characteristics; - - characteristics.taskpool_id = task->taskpool->taskpool_id; - characteristics.task_class_id = task->task_class->task_class_id; - characteristics.nb_data_items = task->task_class->nb_parameters; - characteristics.total_data_size = find_data_size(es, task); - characteristics.priority = task->priority; - characteristics.chore_id = find_chore(es, task); - - PARSEC_PROFILING_TRACE(es->es_profile, - task_granularity_trace_keyout, - task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), - task->task_class->key_functions->key_hash(task->task_class->make_key(task->taskpool, task->locals), NULL), - (void*)&characteristics); - -} - - From eb9a35b9a87e7e4d85cfc045620986f0ad621508 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 25 Aug 2022 23:08:14 -0400 Subject: [PATCH 175/215] patch: PR #426 https://github.com/ICLDisco/parsec/pull/426/files --- parsec/mca/device/cuda/device_cuda_module.c | 20 ++-- parsec/mca/device/device.c | 120 +++++++++++++++----- parsec/mca/device/device.h | 48 ++++++-- parsec/mca/device/device_gpu.c | 13 ++- parsec/mca/device/transfer_gpu.c | 2 +- 5 files changed, 158 insertions(+), 45 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index f620dc2ae..087aed56a 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -453,9 +453,9 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) device->type = PARSEC_DEV_CUDA; device->executed_tasks = 0; - device->transferred_data_in = 0; - device->d2d_transfer = 0; - device->transferred_data_out = 0; + device->data_in_array_size = parsec_device_cuda_enabled + 16; + device->data_in_from_device = (uint64_t*)calloc(device->data_in_array_size, sizeof(uint64_t)); + device->data_out_to_host = 0; device->required_data_in = 0; device->required_data_out = 0; @@ -608,6 +608,10 @@ parsec_cuda_module_fini(parsec_device_module_t* device) cuda_device->cuda_index = -1; + free(cuda_device->super.super.data_in_from_device); + cuda_device->super.super.data_in_from_device = NULL; + cuda_device->super.super.data_in_array_size = 0; + /* Cleanup the GPU memory. */ PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); @@ -1631,10 +1635,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, return -1; } - if( in_elem_dev->super.super.type != PARSEC_DEV_CUDA ) - gpu_device->super.transferred_data_in += nb_elts; - else - gpu_device->super.d2d_transfer += nb_elts; + parsec_device_check_statistic_array_size(&gpu_device->super, in_elem_dev->super.super.device_index); + gpu_device->super.data_in_from_device[in_elem_dev->super.super.device_index] += nb_elts; + + if( PARSEC_GPU_TASK_TYPE_KERNEL == gpu_task->task_type ) gpu_device->super.nb_data_faults += nb_elts; @@ -2522,7 +2526,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_unlock(&original->lock); goto release_and_return_error; } - gpu_device->super.transferred_data_out += nb_elts; /* TODO: not hardcoded, use datatype size */ + gpu_device->super.data_out_to_host += nb_elts; /* TODO: not hardcoded, use datatype size */ how_many++; } else { assert( 0 == gpu_copy->readers ); diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index ca756eebc..4fa7086d6 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -319,15 +319,16 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex { int *device_counter, total = 0; uint64_t total_data_in = 0, total_data_out = 0; - uint64_t total_required_in = 0, total_required_out = 0; + uint64_t total_required_in = 0, total_required_out = 0, total_d2d = 0; uint64_t *transferred_in, *transferred_out; uint64_t *required_in, *required_out; + uint64_t *d2d, d2dtmp; float gtotal = 0.0; - float best_data_in, best_data_out; + float best_data_in, best_data_out, best_d2d; float best_required_in, best_required_out; - char *data_in_unit, *data_out_unit; + char *data_in_unit, *data_out_unit, *d2d_unit; char *required_in_unit, *required_out_unit; - char percent1[64], percent2[64]; + char percent1[64], percent2[64], percent3[64]; parsec_device_module_t *device; uint32_t i; @@ -337,6 +338,7 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex transferred_out = (uint64_t*)calloc(parsec_nb_devices, sizeof(uint64_t)); required_in = (uint64_t*)calloc(parsec_nb_devices, sizeof(uint64_t)); required_out = (uint64_t*)calloc(parsec_nb_devices, sizeof(uint64_t)); + d2d = (uint64_t*)calloc(parsec_nb_devices, sizeof(uint64_t)); /** * Save the statistics locally. @@ -346,22 +348,23 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex assert( i == device->device_index ); /* Save the statistics */ device_counter[device->device_index] += device->executed_tasks; - transferred_in[device->device_index] += device->transferred_data_in; - transferred_out[device->device_index] += device->transferred_data_out; + transferred_in[device->device_index] += device->data_in_from_device[0]; + total_data_in += device->data_in_from_device[0]; + transferred_out[device->device_index] += device->data_out_to_host; required_in[device->device_index] += device->required_data_in; required_out[device->device_index] += device->required_data_out; + d2dtmp = 0; + for(unsigned int j = 1; j < device->data_in_array_size; j++) { + d2dtmp += device->data_in_from_device[i]; + } + d2d[device->device_index] += d2dtmp; /* Update the context-level statistics */ total += device->executed_tasks; - total_data_in += device->transferred_data_in; - total_data_out += device->transferred_data_out; + total_data_in += d2dtmp; + total_data_out += device->data_out_to_host; total_required_in += device->required_data_in; total_required_out += device->required_data_out; - - device->executed_tasks = 0; - device->transferred_data_in = 0; - device->transferred_data_out = 0; - device->required_data_in = 0; - device->required_data_out = 0; + total_d2d += d2dtmp; } /* Print statistics */ @@ -369,11 +372,11 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex if( 0 == total_data_out ) total_data_out = 1; gtotal = (float)total; - printf("--------------------------------------------------------------------------------------------------\n"); - printf("| | | Data In | Data Out |\n"); - printf("|Rank %3d | # KERNEL | %% | Required | Transfered(%%) | Required | Transfered(%%) |\n", + printf("+----------------------------------------------------------------------------------------------------------------------------+\n"); + printf("| | | Data In | Data Out |\n"); + printf("|Rank %3d | # KERNEL | %% | Required | Transfered H2D(%%) | Transfered D2D(%%) | Required | Transfered(%%) |\n", (NULL == parsec_context ? parsec_debug_rank : parsec_context->my_rank)); - printf("|---------|-----------|--------|------------|-------------------|------------|-------------------|\n"); + printf("|---------|-----------|--------|------------|-----------------------|-----------------------|------------|-------------------|\n"); for( i = 0; i < parsec_nb_devices; i++ ) { if( NULL == (device = parsec_devices[i]) ) continue; @@ -381,11 +384,15 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex parsec_compute_best_unit( required_out[i], &best_required_out, &required_out_unit ); parsec_compute_best_unit( transferred_in[i], &best_data_in, &data_in_unit ); parsec_compute_best_unit( transferred_out[i], &best_data_out, &data_out_unit ); + parsec_compute_best_unit( total_d2d, &best_d2d, &d2d_unit ); + parsec_compute_best_unit( d2d[i], &best_d2d, &d2d_unit ); - printf("| Dev %2d |%10d | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) | %s\n", + printf("| Dev %2d |%10d | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) | %s\n", device->device_index, device_counter[i], (device_counter[i]/gtotal)*100.00, best_required_in, required_in_unit, best_data_in, data_in_unit, (((double)transferred_in[i]) / (double)required_in[i] ) * 100.0, + best_d2d, d2d_unit, + (((double)d2d[i])/ (double)required_in[i]) * 100.0, best_required_out, required_out_unit, best_data_out, data_out_unit, (((double)transferred_out[i]) / (double)required_out[i]) * 100.0, device->name ); } @@ -399,27 +406,77 @@ void parsec_mca_device_dump_and_reset_statistics(parsec_context_t* parsec_contex if( 0 == total_required_in ) { snprintf(percent1, 64, "nan"); + snprintf(percent2, 64, "nan"); } else { snprintf(percent1, 64, "%5.2f", ((double)total_data_in / (double)total_required_in ) * 100.0); + snprintf(percent2, 64, "%5.2f", ((double)total_d2d / (double)total_required_in) * 100.0); } if( 0 == total_required_out ) { - snprintf(percent2, 64, "nan"); + snprintf(percent3, 64, "nan"); } else { - snprintf(percent2, 64, "%5.2f", ((double)total_data_out / (double)total_required_out) * 100.0); + snprintf(percent3, 64, "%5.2f", ((double)total_data_out / (double)total_required_out) * 100.0); } - printf("|All Devs |%10d | %5.2f | %8.2f%2s | %8.2f%2s(%s) | %8.2f%2s | %8.2f%2s(%s) |\n", + printf("|All Devs |%10d | %5.2f | %8.2f%2s | %8.2f%2s(%s) | %8.2f%2s(%s) | %8.2f%2s | %8.2f%2s(%s) |\n", total, (total/gtotal)*100.00, - best_required_in, required_in_unit, best_data_in, data_in_unit, - percent1, - best_required_out, required_out_unit, best_data_out, data_out_unit, - percent2); - printf("-------------------------------------------------------------------------------------------------\n"); + best_required_in, required_in_unit, best_data_in, data_in_unit, percent1, + best_d2d, d2d_unit, percent2, + best_required_out, required_out_unit, best_data_out, data_out_unit, percent3); + printf("+----------------------------------------------------------------------------------------------------------------------------+\n"); + + + printf("\n" + "Full transfer matrix:\n" + "dst\\src "); + for(i = 0; i < parsec_nb_devices; i++) { + if(NULL == parsec_devices[i]) continue; + printf("%10d ", i); + } + printf("\n"); + // 0 is stored in the other devices, because they push to 0, 0 doesn't pull data. + printf(" %3d - ", 0); + for(i = 1; i < parsec_nb_devices; i++) { + if( NULL == (device = parsec_devices[i]) ) continue; + assert( i == device->device_index ); + parsec_compute_best_unit(device->data_out_to_host, &best_d2d, &d2d_unit); + printf(" %8.2f%2s", best_d2d, d2d_unit); + } + printf("\n"); + // The other devices pull data, and they have counted locally how much + for(i = 1; i < parsec_nb_devices; i++) { + if( NULL == (device = parsec_devices[i]) ) continue; + assert( i == device->device_index ); + printf(" %3d ", i); + for(unsigned int j = 0; j < parsec_nb_devices; j++) { + if( device->data_in_array_size ) { + d2dtmp = device->data_in_from_device[j]; + } else { + d2dtmp = 0; + } + parsec_compute_best_unit( d2dtmp, &best_d2d, &d2d_unit); + if(i!=j) printf(" %8.2f%2s", best_d2d, d2d_unit); + else printf(" - "); + } + printf("\n"); + } free(device_counter); free(transferred_in); free(transferred_out); free(required_in); free(required_out); + + /** + * Reset the statistics for next turn if there is one. + */ + for(i = 0; i < parsec_nb_devices; i++) { + if( NULL == (device = parsec_devices[i]) ) continue; + assert( i == device->device_index ); + device->executed_tasks = 0; + memset(device->data_in_from_device, 0, sizeof(uint64_t)*device->data_in_array_size); + device->data_out_to_host = 0; + device->required_data_in = 0; + device->required_data_out = 0; + } } int parsec_mca_device_fini(void) @@ -808,6 +865,8 @@ int parsec_mca_device_attach(parsec_context_t* context) parsec_device_cpus = (parsec_device_module_t*)calloc(1, sizeof(parsec_device_module_t)); parsec_device_cpus->name = "default"; parsec_device_cpus->type = PARSEC_DEV_CPU; + parsec_device_cpus->data_in_from_device = (uint64_t*)calloc(2, sizeof(uint64_t)); + parsec_device_cpus->data_in_array_size = 2; cpu_weights(parsec_device_cpus, nb_total_comp_threads); parsec_device_cpus->taskpool_register = device_taskpool_register_static; parsec_mca_device_add(context, parsec_device_cpus); @@ -818,6 +877,8 @@ int parsec_mca_device_attach(parsec_context_t* context) parsec_device_recursive = (parsec_device_module_t*)calloc(1, sizeof(parsec_device_module_t)); parsec_device_recursive->name = "recursive"; parsec_device_recursive->type = PARSEC_DEV_RECURSIVE; + parsec_device_recursive->data_in_from_device = (uint64_t*)calloc(2, sizeof(uint64_t)); + parsec_device_recursive->data_in_array_size = 2; parsec_device_recursive->device_hweight = parsec_device_cpus->device_hweight; parsec_device_recursive->device_tweight = parsec_device_cpus->device_tweight; parsec_device_recursive->device_sweight = parsec_device_cpus->device_sweight; @@ -897,6 +958,11 @@ int parsec_mca_device_remove(parsec_device_module_t* device) parsec_devices[device->device_index] = NULL; device->context = NULL; device->device_index = -1; + if(NULL != device->data_in_from_device) { + free(device->data_in_from_device); + device->data_in_from_device = NULL; + device->data_in_array_size = 0; + } unlock_and_return_rc: parsec_atomic_unlock(&parsec_devices_mutex); /* CRITICAL SECTION: END */ return rc; diff --git a/parsec/mca/device/device.h b/parsec/mca/device/device.h index cca9fca08..d34daa077 100644 --- a/parsec/mca/device/device.h +++ b/parsec/mca/device/device.h @@ -42,6 +42,8 @@ #include "parsec/mca/mca.h" #include "parsec/class/info.h" +#include + BEGIN_C_DECLS typedef struct parsec_device_module_s parsec_device_module_t; @@ -125,13 +127,12 @@ struct parsec_device_module_s { parsec_info_object_array_t infos; /**< Per-device info objects are stored here */ struct parsec_context_s* context; /**< The PaRSEC context this device belongs too */ char* name; /**< Simple identified for the device */ - uint64_t transferred_data_in; - uint64_t transferred_data_out; - uint64_t d2d_transfer; - uint64_t required_data_in; - uint64_t required_data_out; - uint64_t executed_tasks; - uint64_t nb_data_faults; + uint64_t *data_in_from_device; /**< One counter per device: how many bytes have been copied from this device */ + uint64_t data_out_to_host; /**< When a device writes back to the host, it counts it here, to avoid the need for an atomic operation on the data_in_from_device of the host device */ + uint64_t required_data_in; + uint64_t required_data_out; + uint64_t executed_tasks; + uint64_t nb_data_faults; float device_hweight; /**< Number of half precision operations per second */ float device_sweight; /**< Number of single precision operations per second */ float device_dweight; /**< Number of double precision operations per second */ @@ -139,6 +140,7 @@ struct parsec_device_module_s { #if defined(PARSEC_PROF_TRACE) parsec_profiling_stream_t *profiling; #endif /* defined(PROFILING) */ + uint8_t data_in_array_size; /**< Current size of the data_in_from_device array. */ uint8_t device_index; uint8_t type; }; @@ -282,6 +284,38 @@ parsec_device_find_function(const char* function_name, MCA_BASE_VERSION_2_0_0, \ "device", 2, 0, 0 +/** + * @brief Ensures that the data_in_from_device, which is used to collect statistics on + * the amount of data transferred between devices, is large enough to store this device. + * + * @remark index should always be < device->data_in_array_size. However, since devices are + * discovered dynamically, it's possible we initialized some structures too small. When this + * happens, this function ensures that we don't segfault because of a statistics array. + * + * @details + * @p device: the device holding the statistics array + * @p index: the index in this statistics array that will be updated + * + */ + static inline void parsec_device_check_statistic_array_size(parsec_device_module_t *device, uint8_t index) + { + assert( index < 255 ); + while(index >= device->data_in_array_size) { + uint8_t os = device->data_in_array_size; + uint8_t ns = index+1; + uint64_t *o = device->data_in_from_device; + uint64_t *n = (uint64_t*)calloc(ns, sizeof(uint64_t)); + memcpy(n, o, os*sizeof(uint64_t)); + if( parsec_atomic_cas_ptr(&device->data_in_from_device, o, n) ) { + device->data_in_array_size = ns; + free(o); + break; + } else { + free(n); + } + } + } + /** @} */ END_C_DECLS diff --git a/parsec/mca/device/device_gpu.c b/parsec/mca/device/device_gpu.c index ec2059c47..132ee1caa 100644 --- a/parsec/mca/device/device_gpu.c +++ b/parsec/mca/device/device_gpu.c @@ -304,6 +304,15 @@ void dump_exec_stream(parsec_gpu_exec_stream_t* exec_stream) void dump_GPU_state(parsec_device_gpu_module_t* gpu_device) { int i; + uint64_t data_in = 0, data_in_host = 0, data_in_dev = 0; + + for(int i = 0; i < gpu_device->super.data_in_array_size; i++) { + data_in += gpu_device->super.data_in_from_device[i]; + if(i == 0) + data_in_host += gpu_device->super.data_in_from_device[i]; + else + data_in_dev += gpu_device->super.data_in_from_device[i]; + } parsec_output(parsec_gpu_output_stream, "\n\n"); parsec_output(parsec_gpu_output_stream, "Device %d:%d (%p) epoch\n", gpu_device->super.device_index, @@ -311,8 +320,8 @@ void dump_GPU_state(parsec_device_gpu_module_t* gpu_device) parsec_output(parsec_gpu_output_stream, "\tpeer mask %x executed tasks with %llu streams %d\n", gpu_device->peer_access_mask, (unsigned long long)gpu_device->super.executed_tasks, gpu_device->num_exec_streams); parsec_output(parsec_gpu_output_stream, "\tstats transferred [in: %llu from host %llu from other device out: %llu] required [in: %llu out: %llu]\n", - (unsigned long long)gpu_device->super.transferred_data_in, (unsigned long long)gpu_device->super.d2d_transfer, - (unsigned long long)gpu_device->super.transferred_data_out, + (unsigned long long)data_in_host, (unsigned long long)data_in_dev, + (unsigned long long)gpu_device->super.data_out_to_host, (unsigned long long)gpu_device->super.required_data_in, (unsigned long long)gpu_device->super.required_data_out); for( i = 0; i < gpu_device->num_exec_streams; i++ ) { dump_exec_stream(gpu_device->exec_stream[i]); diff --git a/parsec/mca/device/transfer_gpu.c b/parsec/mca/device/transfer_gpu.c index 521417426..775199b08 100644 --- a/parsec/mca/device/transfer_gpu.c +++ b/parsec/mca/device/transfer_gpu.c @@ -299,7 +299,7 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, //gpu_copy->readers--; PARSEC_DATA_COPY_DEC_READERS(gpu_copy); gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - gpu_device->super.transferred_data_out += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ + gpu_device->super.data_out_to_host += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ assert(gpu_copy->readers >= 0); original = gpu_copy->original; From 968d9d3e2501546472650ad6b987287091c6e15f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 22 Sep 2022 23:37:53 -0400 Subject: [PATCH 176/215] tracing extended --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 28 ++++-- parsec/mca/device/cuda/device_cuda_module.c | 95 ++++++++++++++++---- parsec/mca/device/device_gpu.h | 13 ++- 4 files changed, 108 insertions(+), 30 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index cc7ba92bc..d59a03edc 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -36,7 +36,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time{double};stage_in_time{double};device_index{int32_t};task_count{int32_t};waiting_tasks{int32_t};type{int32_t}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index da2eb7e60..6577f4414 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -61,15 +61,25 @@ typedef struct task_mapping_item_s typedef struct gpu_dev_prof_s { - double first_queue_time; - double select_time; - double second_queue_time; - double exec_time; - double stage_in_time; - int32_t device_index; - int32_t task_count; - int32_t waiting_tasks; - int32_t type; + double first_queue_time; + double select_time; + double second_queue_time; + double exec_time_start; + double exec_time_end; + double first_stage_in_time_start; + double sec_stage_in_time_start; + double first_stage_in_time_end; + double sec_stage_in_time_end; + double stage_out_time_start; + double stage_out_time_end; + double complete_time; + double device_index; + double task_count; + double waiting_tasks; + double mig_status; + double nb_first_stage_in; + double nb_sec_stage_in; + double task_type; } gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 087aed56a..bddf3cd4d 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1542,6 +1542,13 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; + #if defined(PARSEC_PROF_TRACE) + if (gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN) + gpu_task->nb_first_stage_in++; + else + gpu_task->nb_sec_stage_in++; + #endif + /* If it is already under transfer, don't schedule the transfer again. * This happens if the task refers twice (or more) to the same input flow */ if( gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_UNDER_TRANSFER ) { @@ -2189,15 +2196,6 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, assert( NULL == stream->tasks[stream->start] ); -#if defined(PARSEC_PROF_TRACE) - if(NULL != task && task->migrate_status != TASK_NOT_MIGRATED) - { - if( gpu_device->exec_stream[0] == stream ) //stage_in time - task->stage_in_time = MPI_Wtime(); - else if( gpu_device->exec_stream[1] != stream) //execution time - task->exec_time = MPI_Wtime(); - } -#endif /** * In case the task is succesfully progressed, the corresponding profiling * event is triggered. @@ -2227,7 +2225,20 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, } #endif /* defined(PARSEC_DEBUG_PARANOID) */ } + +#if defined(PARSEC_PROF_TRACE) + if(task != NULL) + task->exec_time_start = MPI_Wtime(); // start task execution +#endif + rc = progress_fct( gpu_device, task, stream ); + +#if defined(PARSEC_PROF_TRACE) + if(task != NULL) + task->exec_time_end = MPI_Wtime(); // start task execution +#endif + + if( 0 > rc ) { if( PARSEC_HOOK_RETURN_AGAIN != rc && PARSEC_HOOK_RETURN_ASYNC != rc ) { @@ -2290,6 +2301,15 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, char tmp[MAX_TASK_STRLEN]; #endif +#if defined(PARSEC_PROF_TRACE) + + if( gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time + gpu_task->first_stage_in_time_start = MPI_Wtime(); + else //second stage_in start time + gpu_task->sec_stage_in_time_start = MPI_Wtime(); + +#endif + #if 0 if( gpu_task->last_data_check_epoch == gpu_device->data_avail_epoch ) return PARSEC_HOOK_RETURN_AGAIN; @@ -2357,6 +2377,15 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } +#if defined(PARSEC_PROF_TRACE) + + if( gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in complete time + gpu_task->first_stage_in_time_end = MPI_Wtime(); + else //second stage_in complete time + gpu_task->sec_stage_in_time_end = MPI_Wtime(); + +#endif + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", gpu_device->super.name, @@ -2390,6 +2419,10 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, char tmp[MAX_TASK_STRLEN]; #endif +#if defined(PARSEC_PROF_TRACE) + gpu_task->stage_out_time_start = MPI_Wtime(); +#endif + if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_D2HTRANSFER) { for( i = 0; i < this_task->locals[0].value; i++ ) { gpu_copy = this_task->data[i].data_out; @@ -2536,6 +2569,10 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_unlock(&original->lock); } +#if defined(PARSEC_PROF_TRACE) + gpu_task->stage_out_time_end = MPI_Wtime(); +#endif + release_and_return_error: if( update_data_epoch ) { gpu_device->data_avail_epoch++; @@ -2792,7 +2829,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #if defined(PARSEC_PROF_TRACE) if(gpu_task->migrate_status == TASK_NOT_MIGRATED) + { gpu_task->first_queue_time = MPI_Wtime(); + gpu_task->nb_first_stage_in = 0; + gpu_task->nb_sec_stage_in = 0; + } else { gpu_task->second_queue_time = MPI_Wtime(); @@ -2908,6 +2949,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( -1 == rc ) goto disable_gpu; } + if( NULL != progress_task ) { /* We have a succesfully completed task. However, it is not gpu_task, as * it was just submitted into the data retrieval system. Instead, the task @@ -2978,6 +3020,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_device->super.executed_tasks++; __parsec_complete_execution( es, gpu_task->ec ); +#if defined(PARSEC_PROF_TRACE) + if(gpu_task != NULL) + gpu_task->complete_time = MPI_Wtime(); +#endif + /** * @brief For tasks migrated after stage_ in, during the first stage_in * we would have increased the refcount of the data_in. If the task was not @@ -2995,15 +3042,27 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #if defined(PARSEC_PROF_TRACE) gpu_dev_prof_t prof_info; - prof_info.device_index = gpu_device->super.device_index; - prof_info.task_count = gpu_device->mutex; - prof_info.first_queue_time = gpu_task->first_queue_time; - prof_info.select_time = gpu_task->select_time; - prof_info.second_queue_time = gpu_task->second_queue_time; - prof_info.exec_time = gpu_task->exec_time; - prof_info.stage_in_time = gpu_task->stage_in_time; - prof_info.waiting_tasks = gpu_task->waiting_tasks; - prof_info.type = gpu_task->migrate_status; + + prof_info.task_type = gpu_task->task_type; + prof_info.device_index = gpu_device->super.device_index; + prof_info.task_count = gpu_device->mutex; + prof_info.first_queue_time = gpu_task->first_queue_time; + prof_info.select_time = gpu_task->select_time; + prof_info.second_queue_time = gpu_task->second_queue_time; + prof_info.exec_time_start = gpu_task->exec_time_start; + prof_info.exec_time_end = gpu_task->exec_time_end; + prof_info.complete_time = gpu_task->complete_time; + prof_info.first_stage_in_time_start = gpu_task->first_stage_in_time_start; + prof_info.sec_stage_in_time_start = gpu_task->sec_stage_in_time_start; + prof_info.first_stage_in_time_end = gpu_task->first_stage_in_time_end; + prof_info.sec_stage_in_time_end = gpu_task->sec_stage_in_time_end; + prof_info.stage_out_time_start = gpu_task->stage_out_time_start; + prof_info.stage_out_time_end = gpu_task->stage_out_time_end; + prof_info.waiting_tasks = gpu_task->waiting_tasks; + prof_info.mig_status = gpu_task->migrate_status; + prof_info.nb_first_stage_in = gpu_task->nb_first_stage_in; + prof_info.nb_sec_stage_in = gpu_task->nb_sec_stage_in; + parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 47dcb1b9c..e8e460bc1 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -97,9 +97,18 @@ struct parsec_gpu_task_s { double first_queue_time; double select_time; double second_queue_time; - double stage_in_time; - double exec_time; + double first_stage_in_time_start; + double sec_stage_in_time_start; + double first_stage_in_time_end; + double sec_stage_in_time_end; + double exec_time_start; + double exec_time_end; + double stage_out_time_start; + double stage_out_time_end; + double complete_time; int32_t waiting_tasks; + int32_t nb_first_stage_in; + int32_t nb_sec_stage_in; #endif union { struct { From 3ccf91793c78bf9eac1f68c6f90fb0a838b85587 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 26 Sep 2022 07:30:00 -0400 Subject: [PATCH 177/215] single pass implemented --- parsec/mca/device/cuda/device_cuda_migrate.c | 417 ++++++++++--------- 1 file changed, 222 insertions(+), 195 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d59a03edc..a069db711 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -2,7 +2,7 @@ extern int parsec_device_cuda_enabled; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) -extern int parsec_migrate_statistics; +extern int parsec_migrate_statistics; parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; // list of all migrated task @@ -35,9 +35,9 @@ static void task_mapping_ht_free_elt(void *_item, void *table) static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", - sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", - &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); + sizeof(gpu_dev_prof_t), + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", + &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } /** @@ -59,18 +59,19 @@ int parsec_cuda_migrate_init(int ndevices) for (i = 0; i < NDEVICES; i++) { - for (j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; - device_info[i].load = 0; - device_info[i].level0 = 0; - device_info[i].level1 = 0; - device_info[i].level2 = 0; + for (j = 0; j < EXECUTION_LEVEL; j++) + device_info[i].task_count[j] = 0; + device_info[i].load = 0; + device_info[i].level0 = 0; + device_info[i].level1 = 0; + device_info[i].level2 = 0; device_info[i].total_tasks_executed = 0; - device_info[i].received = 0; - device_info[i].last_device = i; - device_info[i].deal_count = 0; - device_info[i].success_count = 0; - device_info[i].ready_compute_tasks = 0; - device_info[i].affinity_count = 0; + device_info[i].received = 0; + device_info[i].last_device = i; + device_info[i].deal_count = 0; + device_info[i].success_count = 0; + device_info[i].ready_compute_tasks = 0; + device_info[i].affinity_count = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -93,7 +94,6 @@ int parsec_cuda_migrate_fini() int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; - end = MPI_Wtime(); #if defined(PARSEC_HAVE_CUDA) @@ -105,7 +105,7 @@ int parsec_cuda_migrate_fini() PARSEC_OBJ_RELEASE(task_mapping_ht); task_mapping_ht = NULL; - if(parsec_migrate_statistics) + if (parsec_migrate_statistics) { for (i = 0; i < NDEVICES; i++) { @@ -128,7 +128,7 @@ int parsec_cuda_migrate_fini() device_info[i].level0, device_info[i].level1, device_info[i].level2, tot_task_migrated); printf("Tasks with affinity migrated : %d \n", device_info[i].affinity_count); - printf("Perc of affinity tasks : %lf \n", ( (float) device_info[i].affinity_count/ tot_task_migrated) * 100); + printf("Perc of affinity tasks : %lf \n", ((float)device_info[i].affinity_count / tot_task_migrated) * 100); summary_affinity += device_info[i].affinity_count; printf("Task received : %d \n", device_info[i].received); printf("Chunk Size : %d \n", parsec_cuda_migrate_chunk_size); @@ -146,10 +146,10 @@ int parsec_cuda_migrate_fini() printf("Total compute tasks executed : %d \n", summary_total_compute_tasks_executed); printf("Perc of compute tasks : %lf \n", ((float)summary_total_compute_tasks_executed / summary_total_tasks_executed) * 100); printf("Tasks migrated : level0 %d, level1 %d, level2 %d (Total %d)\n", - summary_total_l0_tasks_migrated, summary_total_l1_tasks_migrated, summary_total_l2_tasks_migrated, - summary_total_tasks_migrated); - printf("Tasks with affinity migrated : %d \n", summary_affinity); - printf("Perc of affinity tasks : %lf \n", ( (float) summary_affinity / summary_total_tasks_migrated) * 100); + summary_total_l0_tasks_migrated, summary_total_l1_tasks_migrated, summary_total_l2_tasks_migrated, + summary_total_tasks_migrated); + printf("Tasks with affinity migrated : %d \n", summary_affinity); + printf("Perc of affinity tasks : %lf \n", ((float)summary_affinity / summary_total_tasks_migrated) * 100); printf("Total deals : %d \n", summary_deals); printf("Successful deals : %d \n", summary_successful_deals); @@ -160,7 +160,6 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per deal : %lf \n", summary_avg_task_migrated); printf("Avg task migrated per successfull deal : %lf \n", summary_avg_task_migrated_per_sucess); printf("perc of successfull deals : %lf \n", summary_deal_success_perc); - } printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); @@ -233,11 +232,11 @@ int is_starving(int device) * starvtion if the number of ready tasks available is less than twice the * number of execution stream. */ - parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); + parsec_device_gpu_module_t *d = parsec_mca_device_get(DEVICE_NUM(device)); return (d->mutex < 5) ? 1 : 0; - //return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; - //return (get_compute_tasks_executed(device) < 5) ? 1 : 0; + // return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; + // return (get_compute_tasks_executed(device) < 5) ? 1 : 0; } int will_starve(int device) @@ -247,12 +246,11 @@ int will_starve(int device) * starvtion if migrating a task will push the number of ready tasks available * to less than twice the number of execution stream. */ - //parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); - //return (d->mutex < 5) ? 1 : 0; + // parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); + // return (d->mutex < 5) ? 1 : 0; - //return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; + // return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; return (get_compute_tasks_executed(device) < 5) ? 1 : 0; - } /** @@ -352,6 +350,150 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t return 0; } +int single_pass_selection(parsec_execution_stream_t *es, parsec_list_t *ring, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device) +{ + int starving_device_index = -1, dealer_device_index = 0; + int nb_migrated = 0, execution_level = 0; + int deal_success = 0, device_affinity = 0; + int i = 0, j = 0, k = 0, d = 0; + parsec_gpu_task_t *migrated_gpu_task = NULL; + migrated_task_t *mig_task = NULL; + + dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); + starving_device_index = CUDA_DEVICE_NUM(starving_device->super.device_index); + + for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) + { + migrated_gpu_task = NULL; + execution_level = select_task_from_device_queues(es, dealer_device, &migrated_gpu_task); + + if (migrated_gpu_task != NULL) + { + assert(migrated_gpu_task->ec != NULL); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + + // keep track of compute task count. Decrement compute task count. + dec_compute_task_count(dealer_device_index); + + if (parsec_migrate_statistics) + { + if (execution_level == 0) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; + } + if (execution_level == 1) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; + } + if (execution_level == 2) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; + } + } + + deal_success++; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); + + if (parsec_migrate_statistics) + { + if (execution_level == 2) + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + else + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + + if (device_affinity) + device_info[dealer_device_index].affinity_count++; + } + + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + + parsec_list_push_front(ring, migrated_gpu_task); + } + } // end for i + + return deal_success; +} + +int select_task_from_device_queues(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_gpu_task_t **migrated_gpu_task) +{ + int j = 0; + int execution_level = 0; + *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(&(dealer_device->pending)); // level 0 + + if (*migrated_gpu_task != NULL) + { + /** + * @brief if the task is a not a computational kerenel or if it is a task that has + * already been migrated, we stop the migration and push it back to the queue. + */ + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN) + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + { + parsec_list_push_back(&(dealer_device->pending), *migrated_gpu_task); + *migrated_gpu_task = NULL; + } + + execution_level = 0; + } + + if (*migrated_gpu_task == NULL) + { + // level1 - task is aavailble in the stage_in queue. Stage_in not started. + *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 + + if (*migrated_gpu_task != NULL) + { + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + { + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, *migrated_gpu_task); + *migrated_gpu_task = NULL; + } + } + execution_level = 1; + + if (*migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 + + if (*migrated_gpu_task != NULL) + { + // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + { + parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, *migrated_gpu_task); + *migrated_gpu_task = NULL; + } + } + + if (*migrated_gpu_task != NULL) + { + execution_level = 2; + break; + } + } + } // end of j + } + + return execution_level; +} + /** * @brief check if there are any devices starving. If there are any starving device migrate * task from the dealer device to the starving device. @@ -364,8 +506,8 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device) { int starving_device_index = -1, dealer_device_index = 0; - int nb_migrated = 0, execution_level = 0, stream_index = 0; - int deal_success = 0, device_affinity = 0; + int nb_migrated = 0, execution_level = 0; + int deal_success = 0, device_affinity = 0; int i = 0, j = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; @@ -387,163 +529,49 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ device_info[dealer_device_index].deal_count++; deal_success = 0; - for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) - { - /** - * @brief Tasks are searched in different levels one by one. At this point we assume - * that the cost of migration increases, as the level increase. - */ - - //level 0 - task is just pushed to the device queue - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(&(dealer_device->pending)); // level 0 - - if ( migrated_gpu_task != NULL) - { - /** - * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration and push it back to the queue. - */ - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + parsec_list_t *ring = PARSEC_OBJ_NEW(parsec_list_t); + PARSEC_OBJ_RETAIN(ring); - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) - { - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; - } + deal_success = single_pass_selection(es, ring, dealer_device, starving_device); - } - execution_level = 0; - - if (migrated_gpu_task == NULL) - { - // level1 - task is aavailble in the stage_in queue. Stage_in not started. - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 - - if ( migrated_gpu_task != NULL) - { - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */ ) - { - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; - } - - } - execution_level = 1; - - if (migrated_gpu_task == NULL) - { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) - { - // level2 - task is available in one of the execution queue stage_in is complete - migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 - - if ( migrated_gpu_task != NULL) - { - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - - if (migrated_gpu_task->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || migrated_gpu_task->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */ ) - { - parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, (parsec_list_item_t *)migrated_gpu_task); - migrated_gpu_task = NULL; - } - - } - - if (migrated_gpu_task != NULL) - { - execution_level = 2; - stream_index = 2 + j; - break; - } - } - } //end of j - } - - - if (migrated_gpu_task != NULL) - { - assert(migrated_gpu_task->ec != NULL); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); + nb_migrated += deal_success; - // keep track of compute task count. Decrement compute task count. - dec_compute_task_count( dealer_device_index ); - - if(parsec_migrate_statistics) - { - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; - } - } - nb_migrated++; - deal_success++; - parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - - if(parsec_migrate_statistics) - { - if (execution_level == 2) - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - else - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + while (!parsec_list_nolock_is_empty(ring)) + { + migrated_gpu_task = parsec_list_pop_front(ring); + assert(migrated_gpu_task != NULL); - if (device_affinity) - device_info[dealer_device_index].affinity_count++; - } + /** + * @brief An object of type migrated_task_t is created store the migrated task + * and other associated details. This object is enqueued to a node level queue. + * The main objective of this was to make sure that the manager does not have to sepend + * time on migration. It can select the task for migration, enqueue it to the node level + * queue and then return to its normal working. + */ + mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); + PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); + + mig_task->gpu_task = migrated_gpu_task; + for (k = 0; k < MAX_PARAM_COUNT; k++) + migrated_gpu_task->candidate[i] = NULL; + mig_task->dealer_device = dealer_device; + mig_task->starving_device = starving_device; + mig_task->stage_in_status = migrated_gpu_task->migrate_status; +#if defined(PARSEC_PROF_TRACE) + migrated_gpu_task->select_time = MPI_Wtime(); +#endif - /** - * @brief change migrate_status according to the status of the stage in of the - * stage_in data. - */ - if (execution_level == 2) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; - else - migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; - /** - * @brief An object of type migrated_task_t is created store the migrated task - * and other associated details. This object is enqueued to a node level queue. - * The main objective of this was to make sure that the manager does not have to sepend - * time on migration. It can select the task for migration, enqueue it to the node level - * queue and then return to its normal working. - */ - mig_task = (migrated_task_t *)calloc(1, sizeof(migrated_task_t)); - PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); - - mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; - mig_task->dealer_device = dealer_device; - mig_task->starving_device = starving_device; - mig_task->stage_in_status = (execution_level == 2) ? TASK_MIGRATED_AFTER_STAGE_IN : TASK_MIGRATED_BEFORE_STAGE_IN; - - #if defined(PARSEC_PROF_TRACE) - migrated_gpu_task->select_time = MPI_Wtime(); - #endif - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); - parsec_cuda_mig_task_enqueue(es, mig_task); - device_info[dealer_device_index].last_device = starving_device_index; - char tmp[MAX_TASK_STRLEN]; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d: nb_migrated %d", - parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), - execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index, nb_migrated); - } + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); + parsec_cuda_mig_task_enqueue(es, mig_task); - if (will_starve(dealer_device_index)) - break; - } // end for i + device_info[dealer_device_index].last_device = starving_device_index; + char tmp[MAX_TASK_STRLEN]; + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d", + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), + execution_level, mig_task->stage_in_status, dealer_device_index, starving_device_index); + } // end while - if(deal_success > 0) + if (deal_success > 0) device_info[dealer_device_index].success_count++; if (will_starve(dealer_device_index)) @@ -674,7 +702,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); PARSEC_OBJ_RELEASE(task->data[i].data_out); @@ -682,7 +710,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t } parsec_atomic_unlock(&original->lock); - + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, @@ -779,7 +807,7 @@ void clear_task_migrated_per_tp() void print_task_migrated_per_tp() { - if(parsec_migrate_statistics) + if (parsec_migrate_statistics) { printf("\n*********** TASKPOOL %d *********** \n", tp_count++); printf("Tasks migrated in this TP : %d \n", task_migrated_per_tp); @@ -809,25 +837,26 @@ int get_compute_tasks_executed(int device_index) return device_info[device_index].total_compute_tasks; } - int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status) { int i, data_index; parsec_data_t *original = NULL; parsec_data_copy_t *data_copy = NULL; - parsec_task_t* this_task = gpu_task->ec; + parsec_task_t *this_task = gpu_task->ec; - for( i = 0; i < this_task->task_class->nb_flows; i++ ) + for (i = 0; i < this_task->task_class->nb_flows; i++) { - if( NULL == this_task->data[i].data_in ) continue; - if( NULL == this_task->data[i].source_repo_entry ) continue; + if (NULL == this_task->data[i].data_in) + continue; + if (NULL == this_task->data[i].source_repo_entry) + continue; - if (status == TASK_MIGRATED_BEFORE_STAGE_IN) //data will be trasfered from data_in + if (status == TASK_MIGRATED_BEFORE_STAGE_IN) // data will be trasfered from data_in { original = this_task->data[i].data_in->original; data_copy = this_task->data[i].data_in; } - else //data will be trasfered from data_out + else // data will be trasfered from data_out { original = this_task->data[i].data_out->original; data_copy = this_task->data[i].data_out; @@ -835,7 +864,7 @@ int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status if (original->device_copies[device_index] != NULL && data_copy->version == original->device_copies[device_index]->version) - + { /** * If both the both the data copy has the same version, there is no need @@ -846,6 +875,4 @@ int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status } return 0; - } - From 887908e9d076dcdf6b78c85f040b57a61e723d00 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 26 Sep 2022 07:57:25 -0400 Subject: [PATCH 178/215] documenatation and function name changes. --- parsec/mca/device/cuda/device_cuda_migrate.c | 49 ++++++++++++++------ parsec/mca/device/cuda/device_cuda_migrate.h | 7 +++ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index a069db711..ceefc046e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -350,15 +350,26 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t return 0; } -int single_pass_selection(parsec_execution_stream_t *es, parsec_list_t *ring, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device) +/** + * @brief Select the victim task for migration. + * + * @param es + * @param ring + * @param dealer_device + * @param starving_device + * @return int + */ + +int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, + parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device) { int starving_device_index = -1, dealer_device_index = 0; - int nb_migrated = 0, execution_level = 0; + int execution_level = 0; int deal_success = 0, device_affinity = 0; - int i = 0, j = 0, k = 0, d = 0; + int i = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; - migrated_task_t *mig_task = NULL; + dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); starving_device_index = CUDA_DEVICE_NUM(starving_device->super.device_index); @@ -366,7 +377,7 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_list_t *ring, pa for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { migrated_gpu_task = NULL; - execution_level = select_task_from_device_queues(es, dealer_device, &migrated_gpu_task); + execution_level = single_pass_selection(es, dealer_device, &migrated_gpu_task); if (migrated_gpu_task != NULL) { @@ -425,7 +436,20 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_list_t *ring, pa return deal_success; } -int select_task_from_device_queues(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_gpu_task_t **migrated_gpu_task) +/** + * @brief Select task from the different device queues using a single pass through the + * device queues. We first select the tasks thet were not staged in. If that is not available, + * we select a task that was staged in. The function that does not select a bookkeeping tasks + * and tasks that were already migrated. + * + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int + */ + +int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t **migrated_gpu_task) { int j = 0; int execution_level = 0; @@ -507,7 +531,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ { int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0; - int deal_success = 0, device_affinity = 0; + int deal_success = 0; int i = 0, j = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; @@ -527,13 +551,10 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ starving_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(starving_device_index)); device_info[dealer_device_index].deal_count++; - deal_success = 0; parsec_list_t *ring = PARSEC_OBJ_NEW(parsec_list_t); PARSEC_OBJ_RETAIN(ring); - - deal_success = single_pass_selection(es, ring, dealer_device, starving_device); - + deal_success = select_tasks(es, ring, dealer_device, starving_device); nb_migrated += deal_success; while (!parsec_list_nolock_is_empty(ring)) @@ -552,8 +573,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) - migrated_gpu_task->candidate[i] = NULL; + for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = migrated_gpu_task->migrate_status; @@ -578,7 +598,6 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ break; } // end for d - migrated_gpu_task = NULL; /* update the expected load on the GPU device */ parsec_device_load[dealer_device->super.device_index] -= nb_migrated * parsec_device_sweight[dealer_device->super.device_index]; return nb_migrated; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 6577f4414..2399f295d 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -104,4 +104,11 @@ int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); +int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, + parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device); +int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t **migrated_gpu_task); + + #endif From 62d357c9aad2a475422b5aeb33d0c55b2ff7b487 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 27 Sep 2022 06:01:50 -0400 Subject: [PATCH 179/215] single_pass_selection() implemented. Manger makes a single pass through all of its queues to find a suitable tasks. --- parsec/mca/device/cuda/device_cuda_migrate.c | 76 +++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ceefc046e..ca23f8a1e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,4 +1,5 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" +#include "parsec/class/list.h" extern int parsec_device_cuda_enabled; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) @@ -377,6 +378,7 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { migrated_gpu_task = NULL; + //execution_level = single_try_selection(es, dealer_device, &migrated_gpu_task); execution_level = single_pass_selection(es, dealer_device, &migrated_gpu_task); if (migrated_gpu_task != NULL) @@ -448,7 +450,7 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, * @return int */ -int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, +int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_gpu_task_t **migrated_gpu_task) { int j = 0; @@ -518,6 +520,78 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul return execution_level; } +parsec_list_item_t* find_compute_tasks( parsec_list_t* list) +{ + parsec_list_item_t* item = NULL; + parsec_gpu_task_t* task = NULL; + + assert(list != NULL); + + for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + { + task = (parsec_gpu_task_t*) item; + + if ( (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) + break; + } + + if( (item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) + { + parsec_list_nolock_remove( list, item); + return item; + } + + return NULL; + +} + +/** + * @brief Select task from the different device queues using a single pass through the + * device queues. We first select the tasks thet were not staged in. If that is not available, + * we select a task that was staged in. The function that does not select a bookkeeping tasks + * and tasks that were already migrated. + * + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int + */ + +int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t **migrated_gpu_task) +{ + int j = 0; + int execution_level = 0; + + + *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(&(dealer_device->pending)); + execution_level = 0; + + if (*migrated_gpu_task == NULL) + { + // level1 - task is availble in the stage_in queue. Stage_in not started. + *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending); + execution_level = 1; + + if (*migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending); + + if (*migrated_gpu_task != NULL) + { + execution_level = 2; + break; + } + } + } // end of j + } + + return execution_level; +} + /** * @brief check if there are any devices starving. If there are any starving device migrate * task from the dealer device to the starving device. From 4e4e2278ce08fcfb1a68a00690a6723eaa750cad Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 27 Sep 2022 22:37:58 -0400 Subject: [PATCH 180/215] implemented different task selection methods. single_try_selection : Select task from the different device queues using a single try on each device queue. If first try in a queue fails, we move on to the next queue. single_pass_selection : Select task from the different device queues using a single pass through the device queues. two_pass_selection : Select task from the different device queues using a two pass through the device queues. The first pass only selects a task with an affinity to the starving device. If the first pass does not yield any tasks, the second pass selects any available compute tasks. --- .../mca/device/cuda/device_cuda_component.c | 6 + parsec/mca/device/cuda/device_cuda_migrate.c | 249 ++++++++++++++---- parsec/mca/device/cuda/device_cuda_migrate.h | 14 +- 3 files changed, 204 insertions(+), 65 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 6de117b92..328bf82f9 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -44,6 +44,7 @@ int parsec_cuda_migrate_tasks = 0; int parsec_migrate_statistics = 0; int parsec_cuda_iterative = 0; int parsec_cuda_migrate_chunk_size = 0; +int parsec_cuda_migrate_task_selection = 0; static int cuda_mask, cuda_nvlink_mask; @@ -213,6 +214,11 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_chunk_size", "Integer to let the GPU know the number of tasks to be migrated in a single go", false, false, 5, &parsec_cuda_migrate_chunk_size); + (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_task_selection", + "Integer to choose the method of task selection during migration", + false, false, 1, &parsec_cuda_migrate_task_selection); + + #if defined(PARSEC_PROF_TRACE) (void)parsec_mca_param_reg_int_name("device_cuda", "one_profiling_stream_per_cuda_stream", diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ca23f8a1e..59cdbea97 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -2,8 +2,9 @@ #include "parsec/class/list.h" extern int parsec_device_cuda_enabled; -extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) extern int parsec_migrate_statistics; +extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) +extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; // list of all migrated task @@ -162,6 +163,14 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per successfull deal : %lf \n", summary_avg_task_migrated_per_sucess); printf("perc of successfull deals : %lf \n", summary_deal_success_perc); } + + if(parsec_cuda_migrate_task_selection == 0) + printf("Task selection : single try \n" ); + else if(parsec_cuda_migrate_task_selection == 2) + printf("Task selection : two pass \n" ); + else + printf("Task selection : single pass \n" ); + printf("\n---------Execution time = %lf ------------ \n", end - start); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -353,24 +362,24 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t /** * @brief Select the victim task for migration. - * - * @param es - * @param ring - * @param dealer_device - * @param starving_device - * @return int + * + * @param es + * @param ring + * @param dealer_device + * @param starving_device + * @return int */ -int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, +int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device) + parsec_device_gpu_module_t *starving_device, + int selection_type) { int starving_device_index = -1, dealer_device_index = 0; int execution_level = 0; int deal_success = 0, device_affinity = 0; int i = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; - dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); starving_device_index = CUDA_DEVICE_NUM(starving_device->super.device_index); @@ -378,8 +387,13 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { migrated_gpu_task = NULL; - //execution_level = single_try_selection(es, dealer_device, &migrated_gpu_task); - execution_level = single_pass_selection(es, dealer_device, &migrated_gpu_task); + + if (selection_type == 0) + execution_level = single_try_selection(es, dealer_device, &migrated_gpu_task); + else if (selection_type == 2) + execution_level = two_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); + else //default + execution_level = single_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); if (migrated_gpu_task != NULL) { @@ -439,19 +453,18 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, } /** - * @brief Select task from the different device queues using a single pass through the - * device queues. We first select the tasks thet were not staged in. If that is not available, - * we select a task that was staged in. The function that does not select a bookkeeping tasks - * and tasks that were already migrated. - * - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int + * @brief Select task from the different device queues using a single try on each device queue. + * If first try in a queue fails (that is if the first task is not a compute task or a task that + * is already migrated) we move on to the next queue. + * + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int */ -int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t **migrated_gpu_task) +int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t **migrated_gpu_task) { int j = 0; int execution_level = 0; @@ -463,8 +476,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module * @brief if the task is a not a computational kerenel or if it is a task that has * already been migrated, we stop the migration and push it back to the queue. */ - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN) - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) { parsec_list_push_back(&(dealer_device->pending), *migrated_gpu_task); *migrated_gpu_task = NULL; @@ -480,9 +492,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module if (*migrated_gpu_task != NULL) { - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED) { parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, *migrated_gpu_task); *migrated_gpu_task = NULL; @@ -499,9 +509,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module if (*migrated_gpu_task != NULL) { - // device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED /* || !(device_affinity) */) + if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) { parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, *migrated_gpu_task); *migrated_gpu_task = NULL; @@ -520,57 +528,108 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module return execution_level; } -parsec_list_item_t* find_compute_tasks( parsec_list_t* list) +/** + * @brief Find and extract compute tasks for one pass selection. + * + * @param list + * @return parsec_list_item_t* + */ + +parsec_list_item_t* find_compute_tasks_one_pass(parsec_list_t *list) { - parsec_list_item_t* item = NULL; - parsec_gpu_task_t* task = NULL; + parsec_list_item_t *item = NULL; + parsec_gpu_task_t *task = NULL; assert(list != NULL); - for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) { - task = (parsec_gpu_task_t*) item; + task = (parsec_gpu_task_t *)item; - if ( (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) + if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) break; } - if( (item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) + if ((item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) { - parsec_list_nolock_remove( list, item); + parsec_list_nolock_remove(list, item); return item; } return NULL; - } /** - * @brief Select task from the different device queues using a single pass through the - * device queues. We first select the tasks thet were not staged in. If that is not available, - * we select a task that was staged in. The function that does not select a bookkeeping tasks - * and tasks that were already migrated. + * @brief Find and extract compute tasks for two pass selection. * - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int + * @param list + * @param starving_device + * @param stage_in_status + * @param pass_count + * @return parsec_list_item_t* + */ + +parsec_list_item_t* find_compute_tasks_two_pass(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, int pass_count) +{ + parsec_list_item_t *item = NULL; + parsec_gpu_task_t *task = NULL; + int device_affinity; + + assert(list != NULL); + + if (pass_count == SECOND_PASS) + { + for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + { + task = (parsec_gpu_task_t *)item; + + if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) + break; + } + } + else if (pass_count == FIRST_PASS) + { + for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + { + task = (parsec_gpu_task_t *)item; + device_affinity = find_task_affinity(task, starving_device->super.device_index, stage_in_status); + + if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED) && (device_affinity > 0)) + break; + } + } + + if ((item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) + { + parsec_list_nolock_remove(list, item); + return item; + } + + return NULL; +} + +/** + * @brief Select task from the different device queues using a single pass through the + * device queues. + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int */ -int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t **migrated_gpu_task) +int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) { int j = 0; int execution_level = 0; - - *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(&(dealer_device->pending)); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(&(dealer_device->pending)); execution_level = 0; if (*migrated_gpu_task == NULL) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(dealer_device->exec_stream[0]->fifo_pending); execution_level = 1; if (*migrated_gpu_task == NULL) @@ -578,7 +637,7 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *) find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending); if (*migrated_gpu_task != NULL) { @@ -592,6 +651,83 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul return execution_level; } +/** + * @brief Select task from the different device queues using a two pass through the + * device queues. The first pass only selects a task with an affinity to the starving + * device. If the first pass does not yield any tasks, the second pass selects any available + * compute tasks. + * + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int + */ +int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) +{ + int j = 0; + int execution_level = 0; + + // FIRST PASS + + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(&(dealer_device->pending), starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS); + execution_level = 0; + + if (*migrated_gpu_task == NULL) + { + // level1 - task is availble in the stage_in queue. Stage_in not started. + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[0]->fifo_pending, starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS); + execution_level = 1; + + if (*migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, TASK_MIGRATED_AFTER_STAGE_IN, FIRST_PASS); + + if (*migrated_gpu_task != NULL) + { + execution_level = 2; + break; + } + } + } // end of j + } + + // SECOND PASS + + if (*migrated_gpu_task == NULL) + { + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(&(dealer_device->pending), starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS); + execution_level = 0; + + if (*migrated_gpu_task == NULL) + { + // level1 - task is availble in the stage_in queue. Stage_in not started. + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[0]->fifo_pending, starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS); + execution_level = 1; + + if (*migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, TASK_MIGRATED_AFTER_STAGE_IN, SECOND_PASS); + + if (*migrated_gpu_task != NULL) + { + execution_level = 2; + break; + } + } + } // end of j + } + } + + return execution_level; +} + /** * @brief check if there are any devices starving. If there are any starving device migrate * task from the dealer device to the starving device. @@ -628,7 +764,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ parsec_list_t *ring = PARSEC_OBJ_NEW(parsec_list_t); PARSEC_OBJ_RETAIN(ring); - deal_success = select_tasks(es, ring, dealer_device, starving_device); + deal_success = select_tasks(es, ring, dealer_device, starving_device, parsec_cuda_migrate_task_selection); nb_migrated += deal_success; while (!parsec_list_nolock_is_empty(ring)) @@ -647,7 +783,8 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; + for (k = 0; k < MAX_PARAM_COUNT; k++) + migrated_gpu_task->candidate[i] = NULL; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = migrated_gpu_task->migrate_status; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 2399f295d..67c355ae2 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -13,9 +13,12 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) -#define TASK_NOT_MIGRATED 0 +#define TASK_NOT_MIGRATED 0 #define TASK_MIGRATED_BEFORE_STAGE_IN 1 -#define TASK_MIGRATED_AFTER_STAGE_IN 2 +#define TASK_MIGRATED_AFTER_STAGE_IN 2 + +#define FIRST_PASS 1 +#define SECOND_PASS 1 /** * @brief @@ -104,11 +107,4 @@ int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); -int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, - parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device); -int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t **migrated_gpu_task); - - #endif From 62ce7c6e3653757f7476347f6359be8613c30e99 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 29 Sep 2022 04:19:57 -0400 Subject: [PATCH 181/215] code updated. find_compute_tasks_one_pass() and find_compute_tasks_two_pass() replaced by find_compute_tasks(). tracing corrected and other minor code improvements. --- parsec/mca/device/cuda/device_cuda_migrate.c | 89 +++++---------- parsec/mca/device/cuda/device_cuda_migrate.h | 18 ++- parsec/mca/device/cuda/device_cuda_module.c | 114 +++++++++++-------- parsec/mca/device/device_gpu.h | 3 +- 4 files changed, 118 insertions(+), 106 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 59cdbea97..fc2f5741e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -38,7 +38,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -502,7 +502,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module if (*migrated_gpu_task == NULL) { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 @@ -528,48 +528,8 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module return execution_level; } -/** - * @brief Find and extract compute tasks for one pass selection. - * - * @param list - * @return parsec_list_item_t* - */ - -parsec_list_item_t* find_compute_tasks_one_pass(parsec_list_t *list) -{ - parsec_list_item_t *item = NULL; - parsec_gpu_task_t *task = NULL; - - assert(list != NULL); - - for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) - { - task = (parsec_gpu_task_t *)item; - - if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) - break; - } - - if ((item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) - { - parsec_list_nolock_remove(list, item); - return item; - } - - return NULL; -} - -/** - * @brief Find and extract compute tasks for two pass selection. - * - * @param list - * @param starving_device - * @param stage_in_status - * @param pass_count - * @return parsec_list_item_t* - */ - -parsec_list_item_t* find_compute_tasks_two_pass(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, int pass_count) +parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, + int pass_count, int selection_type) { parsec_list_item_t *item = NULL; parsec_gpu_task_t *task = NULL; @@ -577,7 +537,9 @@ parsec_list_item_t* find_compute_tasks_two_pass(parsec_list_t *list, parsec_devi assert(list != NULL); - if (pass_count == SECOND_PASS) + parsec_list_lock(list); + + if ( (pass_count == SECOND_PASS) || (selection_type == SINGLE_PASS_SELECTION) ) { for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) { @@ -587,7 +549,7 @@ parsec_list_item_t* find_compute_tasks_two_pass(parsec_list_t *list, parsec_devi break; } } - else if (pass_count == FIRST_PASS) + else if ( (pass_count == FIRST_PASS)) { for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) { @@ -599,6 +561,8 @@ parsec_list_item_t* find_compute_tasks_two_pass(parsec_list_t *list, parsec_devi } } + parsec_list_unlock(list); + if ((item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) { parsec_list_nolock_remove(list, item); @@ -623,21 +587,24 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul int j = 0; int execution_level = 0; - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(&(dealer_device->pending)); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION); execution_level = 0; if (*migrated_gpu_task == NULL) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(dealer_device->exec_stream[0]->fifo_pending); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION); execution_level = 1; if (*migrated_gpu_task == NULL) { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_one_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, SINGLE_PASS_SELECTION); if (*migrated_gpu_task != NULL) { @@ -670,21 +637,24 @@ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t // FIRST PASS - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(&(dealer_device->pending), starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); execution_level = 0; if (*migrated_gpu_task == NULL) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[0]->fifo_pending, starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); execution_level = 1; if (*migrated_gpu_task == NULL) { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, TASK_MIGRATED_AFTER_STAGE_IN, FIRST_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); if (*migrated_gpu_task != NULL) { @@ -699,21 +669,24 @@ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t if (*migrated_gpu_task == NULL) { - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(&(dealer_device->pending), starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); execution_level = 0; if (*migrated_gpu_task == NULL) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[0]->fifo_pending, starving_device, TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); execution_level = 1; if (*migrated_gpu_task == NULL) { - for (j = 0; j < (dealer_device->max_exec_streams - 2); j++) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks_two_pass(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, TASK_MIGRATED_AFTER_STAGE_IN, SECOND_PASS); + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); if (*migrated_gpu_task != NULL) { diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 67c355ae2..8f7e58172 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -17,8 +17,12 @@ #define TASK_MIGRATED_BEFORE_STAGE_IN 1 #define TASK_MIGRATED_AFTER_STAGE_IN 2 +#define SINGLE_TRY_SELECTION 0 +#define SINGLE_PASS_SELECTION 1 +#define TWO_PASS_SELECTION 2 + #define FIRST_PASS 1 -#define SECOND_PASS 1 +#define SECOND_PASS 2 /** * @brief @@ -78,7 +82,8 @@ typedef struct gpu_dev_prof_s double complete_time; double device_index; double task_count; - double waiting_tasks; + double first_waiting_tasks; + double sec_waiting_tasks; double mig_status; double nb_first_stage_in; double nb_sec_stage_in; @@ -107,4 +112,13 @@ int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); +int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); +int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); +int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_gpu_task_t **migrated_gpu_task); +parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, + int pass_count, int selection_type); + #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index bddf3cd4d..32500f18d 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2155,6 +2155,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if( NULL != stream->tasks[stream->end] ) { rc = cudaEventQuery(cuda_stream->events[stream->end]); if( cudaSuccess == rc ) { + /* Save the task for the next step */ task = *out_task = stream->tasks[stream->end]; PARSEC_DEBUG_VERBOSE(19, parsec_gpu_output_stream, @@ -2165,6 +2166,28 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, stream->tasks[stream->end] = NULL; stream->end = (stream->end + 1) % stream->max_events; + #if defined(PARSEC_PROF_TRACE) + // record the end of the events + + assert(*out_task != NULL); + + if(stream == gpu_device->exec_stream[0]) //stage_in stream + { + if( (*out_task)->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) + (*out_task)->first_stage_in_time_end = MPI_Wtime(); + else + (*out_task)->sec_stage_in_time_end = MPI_Wtime(); + } + else if(stream == gpu_device->exec_stream[1]) //stage_out stream + { + (*out_task)->stage_out_time_end = MPI_Wtime(); + } + else //execution stream + { + (*out_task)->exec_time_end = MPI_Wtime(); + } + #endif + #if defined(PARSEC_PROF_TRACE) if( stream->prof_event_track_enable ) { if( task->prof_key_end != -1 ) { @@ -2226,19 +2249,8 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, #endif /* defined(PARSEC_DEBUG_PARANOID) */ } -#if defined(PARSEC_PROF_TRACE) - if(task != NULL) - task->exec_time_start = MPI_Wtime(); // start task execution -#endif - rc = progress_fct( gpu_device, task, stream ); -#if defined(PARSEC_PROF_TRACE) - if(task != NULL) - task->exec_time_end = MPI_Wtime(); // start task execution -#endif - - if( 0 > rc ) { if( PARSEC_HOOK_RETURN_AGAIN != rc && PARSEC_HOOK_RETURN_ASYNC != rc ) { @@ -2258,6 +2270,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, } *out_task = NULL; return PARSEC_HOOK_RETURN_DONE; + } /** * Do not skip the cuda event generation. The problem is that some of the inputs @@ -2267,7 +2280,30 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, */ rc = cudaEventRecord( cuda_stream->events[stream->start], cuda_stream->cuda_stream ); assert(cudaSuccess == rc); - stream->tasks[stream->start] = task; + stream->tasks[stream->start] = task; + +#if defined(PARSEC_PROF_TRACE) + // record the start of the events + assert( task != NULL); + + if(stream == gpu_device->exec_stream[0]) //stage_in stream + { + if( task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time + task->first_stage_in_time_start = MPI_Wtime(); + else //second stage_in start time + task->sec_stage_in_time_start = MPI_Wtime(); + } + else if(stream == gpu_device->exec_stream[1]) //stage_out stream + { + task->stage_out_time_start = MPI_Wtime(); + } + else //execution stream + { + task->exec_time_start = MPI_Wtime(); + } + +#endif + stream->start = (stream->start + 1) % stream->max_events; PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]: Submitted %s(task %p) priority %d on stream %s{%p}", @@ -2301,15 +2337,6 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, char tmp[MAX_TASK_STRLEN]; #endif -#if defined(PARSEC_PROF_TRACE) - - if( gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time - gpu_task->first_stage_in_time_start = MPI_Wtime(); - else //second stage_in start time - gpu_task->sec_stage_in_time_start = MPI_Wtime(); - -#endif - #if 0 if( gpu_task->last_data_check_epoch == gpu_device->data_avail_epoch ) return PARSEC_HOOK_RETURN_AGAIN; @@ -2377,15 +2404,6 @@ parsec_cuda_kernel_push( parsec_device_gpu_module_t *gpu_device, } } -#if defined(PARSEC_PROF_TRACE) - - if( gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in complete time - gpu_task->first_stage_in_time_end = MPI_Wtime(); - else //second stage_in complete time - gpu_task->sec_stage_in_time_end = MPI_Wtime(); - -#endif - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: Push task %s DONE", gpu_device->super.name, @@ -2419,10 +2437,6 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, char tmp[MAX_TASK_STRLEN]; #endif -#if defined(PARSEC_PROF_TRACE) - gpu_task->stage_out_time_start = MPI_Wtime(); -#endif - if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_D2HTRANSFER) { for( i = 0; i < this_task->locals[0].value; i++ ) { gpu_copy = this_task->data[i].data_out; @@ -2569,10 +2583,6 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_unlock(&original->lock); } -#if defined(PARSEC_PROF_TRACE) - gpu_task->stage_out_time_end = MPI_Wtime(); -#endif - release_and_return_error: if( update_data_epoch ) { gpu_device->data_avail_epoch++; @@ -2830,14 +2840,27 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #if defined(PARSEC_PROF_TRACE) if(gpu_task->migrate_status == TASK_NOT_MIGRATED) { - gpu_task->first_queue_time = MPI_Wtime(); - gpu_task->nb_first_stage_in = 0; - gpu_task->nb_sec_stage_in = 0; + gpu_task->first_queue_time = MPI_Wtime(); + gpu_task->select_time = 0; + gpu_task->second_queue_time = 0; + gpu_task->exec_time_start = 0; + gpu_task->exec_time_end = 0; + gpu_task->complete_time = 0; + gpu_task->first_stage_in_time_start = 0; + gpu_task->sec_stage_in_time_start = 0; + gpu_task->first_stage_in_time_end = 0; + gpu_task->sec_stage_in_time_end = 0; + gpu_task->stage_out_time_start = 0; + gpu_task->stage_out_time_end = 0; + gpu_task->first_waiting_tasks = gpu_device->mutex - 1; + gpu_task->sec_waiting_tasks = 0; + gpu_task->nb_first_stage_in = 0; + gpu_task->nb_sec_stage_in = 0; } else { gpu_task->second_queue_time = MPI_Wtime(); - gpu_task->waiting_tasks = gpu_device->mutex - 1; + gpu_task->sec_waiting_tasks = gpu_device->mutex - 1; } #endif @@ -3056,9 +3079,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.sec_stage_in_time_start = gpu_task->sec_stage_in_time_start; prof_info.first_stage_in_time_end = gpu_task->first_stage_in_time_end; prof_info.sec_stage_in_time_end = gpu_task->sec_stage_in_time_end; - prof_info.stage_out_time_start = gpu_task->stage_out_time_start; - prof_info.stage_out_time_end = gpu_task->stage_out_time_end; - prof_info.waiting_tasks = gpu_task->waiting_tasks; + prof_info.stage_out_time_start = gpu_task->stage_out_time_start; + prof_info.stage_out_time_end = gpu_task->stage_out_time_end; + prof_info.first_waiting_tasks = gpu_task->first_waiting_tasks; + prof_info.sec_waiting_tasks = gpu_task->sec_waiting_tasks; prof_info.mig_status = gpu_task->migrate_status; prof_info.nb_first_stage_in = gpu_task->nb_first_stage_in; prof_info.nb_sec_stage_in = gpu_task->nb_sec_stage_in; diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index e8e460bc1..d3683cdee 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -106,7 +106,8 @@ struct parsec_gpu_task_s { double stage_out_time_start; double stage_out_time_end; double complete_time; - int32_t waiting_tasks; + int32_t first_waiting_tasks; + int32_t sec_waiting_tasks; int32_t nb_first_stage_in; int32_t nb_sec_stage_in; #endif From 8fc3f86f4d07b3a71e05e7e67b19b9679419e91d Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 2 Oct 2022 02:47:45 -0400 Subject: [PATCH 182/215] Event duration of each event in the stream recorded when tracing. --- parsec/mca/device/cuda/device_cuda.h | 5 +- parsec/mca/device/cuda/device_cuda_module.c | 95 +++++++++++++++------ 2 files changed, 75 insertions(+), 25 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda.h b/parsec/mca/device/cuda/device_cuda.h index 507858964..7ebf74413 100644 --- a/parsec/mca/device/cuda/device_cuda.h +++ b/parsec/mca/device/cuda/device_cuda.h @@ -57,7 +57,10 @@ struct parsec_cuda_exec_stream_s { * remains in the system the function is supposed to update it. */ cudaEvent_t *events; - cudaStream_t cuda_stream; + cudaStream_t cuda_stream; +#if defined(PARSEC_PROF_TRACE) + cudaEvent_t *begin_events; +#endif }; /** diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 32500f18d..da84d43b9 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -422,6 +422,16 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) PARSEC_CUDA_CHECK_ERROR( "(INIT) cudaEventCreate ", (cudaError_t)cudastatus, {goto release_device;} ); } + #if defined(PARSEC_PROF_TRACE) + cuda_stream->begin_events = (cudaEvent_t*)malloc(exec_stream->max_events * sizeof(cudaEvent_t)); + for( k = 0; k < exec_stream->max_events; k++ ) { + cuda_stream->begin_events[k] = NULL; + cudastatus = cudaEventCreate(&(cuda_stream->begin_events[k])); + PARSEC_CUDA_CHECK_ERROR( "(INIT) cudaEventCreate ", (cudaError_t)cudastatus, + {goto release_device;} ); + } + #endif + if(j == 0) { len = asprintf(&exec_stream->name, "h2d_cuda(%d)", j); } else if(j == 1) { @@ -536,10 +546,16 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) } free(cuda_stream->events); cuda_stream->events = NULL; } - if( NULL != exec_stream->name ) { - free(exec_stream->name); exec_stream->name = NULL; - } #if defined(PARSEC_PROF_TRACE) + if( NULL != cuda_stream->begin_events ) { + for( k = 0; k < exec_stream->max_events; k++ ) { + if( NULL != cuda_stream->begin_events[k] ) { + (void)cudaEventDestroy(cuda_stream->begin_events[k]); + } + } + free(cuda_stream->begin_events); cuda_stream->begin_events = NULL; + } + if( NULL != exec_stream->profiling ) { /* No function to clean the profiling stream. If one is introduced * some day, remember that exec streams 0 and 1 always share the same @@ -590,6 +606,14 @@ parsec_cuda_module_fini(parsec_device_module_t* device) PARSEC_CUDA_CHECK_ERROR( "(parsec_cuda_device_fini) cudaEventDestroy ", status, {continue;} ); } + #if defined(PARSEC_PROF_TRACE) + for( k = 0; k < exec_stream->max_events; k++ ) { + status = cudaEventDestroy(cuda_stream->begin_events[k]); + PARSEC_CUDA_CHECK_ERROR( "(parsec_cuda_device_fini) cudaEventDestroy ", status, + {continue;} ); + } + free(cuda_stream->begin_events); cuda_stream->begin_events = NULL; + #endif exec_stream->max_events = 0; free(cuda_stream->events); cuda_stream->events = NULL; free(exec_stream->tasks); exec_stream->tasks = NULL; @@ -2118,6 +2142,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, { parsec_advance_task_function_t progress_fct; int saved_rc = 0, rc; + float event_duration = 0; #if defined(PARSEC_DEBUG_NOISIER) char task_str[MAX_TASK_STRLEN]; #endif @@ -2163,30 +2188,48 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, gpu_device->super.name, parsec_task_snprintf(task_str, MAX_TASK_STRLEN, task->ec), task->ec->priority, stream->name, (void*)stream); - stream->tasks[stream->end] = NULL; - stream->end = (stream->end + 1) % stream->max_events; - #if defined(PARSEC_PROF_TRACE) - // record the end of the events + #if defined(PARSEC_PROF_TRACE) + assert(*out_task != NULL); + rc = cudaEventQuery(cuda_stream->begin_events[stream->end]); + assert( cudaSuccess == rc ); + rc = cudaEventElapsedTime( &event_duration, cuda_stream->begin_events[stream->end], cuda_stream->events[stream->end] ); + assert( cudaSuccess == rc ); + assert( event_duration >=0 ); + + event_duration = event_duration / 1000; // ms to sec to match with MPI_Wtime(); - assert(*out_task != NULL); + if(stream == gpu_device->exec_stream[0]) //stage_in stream + { + if( (*out_task)->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) + (*out_task)->first_stage_in_time_end = (*out_task)->first_stage_in_time_start + event_duration; + else + (*out_task)->sec_stage_in_time_end = (*out_task)->sec_stage_in_time_start + event_duration; - if(stream == gpu_device->exec_stream[0]) //stage_in stream - { - if( (*out_task)->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) - (*out_task)->first_stage_in_time_end = MPI_Wtime(); - else - (*out_task)->sec_stage_in_time_end = MPI_Wtime(); - } - else if(stream == gpu_device->exec_stream[1]) //stage_out stream - { - (*out_task)->stage_out_time_end = MPI_Wtime(); - } - else //execution stream - { - (*out_task)->exec_time_end = MPI_Wtime(); - } - #endif + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]: GPU task %p has been staged_in using stream 1 (event index %d event duration %f)", + gpu_device->super.name, (void*) *out_task, stream->end, event_duration); + } + else if(stream == gpu_device->exec_stream[1]) //stage_out stream + { + (*out_task)->stage_out_time_end = (*out_task)->stage_out_time_start + event_duration; + + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]: GPU task %p has been staged_out using stream 2 (event index %d event duration %f)", + gpu_device->super.name, (void*) *out_task, stream->end, event_duration); + } + else //execution stream + { + (*out_task)->exec_time_end = (*out_task)->exec_time_start + event_duration; + + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "GPU[%s]: GPU task %p has been executed using stream 2 (or above) (event index %d event duration %f)", + gpu_device->super.name, (void*) *out_task, stream->end, event_duration); + } + #endif + + stream->tasks[stream->end] = NULL; + stream->end = (stream->end + 1) % stream->max_events; #if defined(PARSEC_PROF_TRACE) if( stream->prof_event_track_enable ) { @@ -2249,6 +2292,10 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, #endif /* defined(PARSEC_DEBUG_PARANOID) */ } +#if defined(PARSEC_PROF_TRACE) + rc = cudaEventRecord( cuda_stream->begin_events[stream->start], cuda_stream->cuda_stream ); + assert(cudaSuccess == rc); +#endif rc = progress_fct( gpu_device, task, stream ); if( 0 > rc ) { From 38355db5fc9255983661ee2874546f1d7265b896 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 2 Oct 2022 02:59:47 -0400 Subject: [PATCH 183/215] code cleanup --- parsec/mca/device/cuda/device_cuda_module.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index da84d43b9..2d607952f 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -843,16 +843,11 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de gpu_copy, gpu_copy->device_private, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, gpu_copy->original, gpu_copy->device_index, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(gpu_copy); - int i, ref_count; if( gpu_copy != NULL) { parsec_warning("parsec_cuda_memory_release_list: Release copy %p original %p readers %d ref_count %d. The copy should have been NULL by this point!! (%s:%d)", gpu_copy, gpu_copy->original, gpu_copy->readers, gpu_copy->super.super.obj_reference_count, __FILE__, __LINE__); - //ref_count = gpu_copy->super.super.obj_reference_count; - //for( i = 0; i < ref_count; i++) - // PARSEC_OBJ_RELEASE(gpu_copy); - PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "parsec_cuda_memory_release_list: key_base %d key %d", gpu_copy->original->dc->key_base, gpu_copy->original->key); } @@ -930,7 +925,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, { parsec_task_t *this_task = gpu_task->ec; parsec_gpu_data_copy_t* temp_loc[MAX_PARAM_COUNT], *gpu_elem, *lru_gpu_elem; - parsec_gpu_data_copy_t* old_data; parsec_data_t* master, *oldmaster; const parsec_flow_t *flow; int i, j, data_avail_epoch = 0; @@ -964,7 +958,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, master = this_task->data[i].data_in->original; parsec_atomic_lock(&master->lock); gpu_elem = PARSEC_DATA_GET_COPY(master, gpu_device->super.device_index); - old_data = this_task->data[i].data_out; this_task->data[i].data_out = gpu_elem; /* There is already a copy on the device */ @@ -1275,8 +1268,6 @@ parsec_default_cuda_stage_in(parsec_gpu_task_t *gtask, size_t count; parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t *)gpu_stream; int i; - char tmp[128]; - for(i = 0; i < task->task_class->nb_flows; i++){ if(flow_mask & (1U << i)){ @@ -2296,6 +2287,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, rc = cudaEventRecord( cuda_stream->begin_events[stream->start], cuda_stream->cuda_stream ); assert(cudaSuccess == rc); #endif + rc = progress_fct( gpu_device, task, stream ); if( 0 > rc ) { @@ -2330,7 +2322,6 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, stream->tasks[stream->start] = task; #if defined(PARSEC_PROF_TRACE) - // record the start of the events assert( task != NULL); if(stream == gpu_device->exec_stream[0]) //stage_in stream From cc4260e60910f591f2bb43421489b981dc5ffd70 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 3 Oct 2022 20:41:28 -0400 Subject: [PATCH 184/215] tracing updated to count number of H2D and D2D transfer for each task. --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- parsec/mca/device/cuda/device_cuda_migrate.h | 4 ++ parsec/mca/device/cuda/device_cuda_module.c | 59 +++++++++++++------- parsec/mca/device/device_gpu.h | 4 ++ 4 files changed, 47 insertions(+), 22 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index fc2f5741e..08f654650 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -38,7 +38,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};task_type{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};task_type{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 8f7e58172..2ab0b3c15 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -87,6 +87,10 @@ typedef struct gpu_dev_prof_s double mig_status; double nb_first_stage_in; double nb_sec_stage_in; + double nb_first_stage_in_d2d; + double nb_first_stage_in_h2d; + double nb_sec_stage_in_d2d; + double nb_sec_stage_in_h2d; double task_type; } gpu_dev_prof_t; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 2d607952f..0fc8fc572 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1559,9 +1559,21 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, #if defined(PARSEC_PROF_TRACE) if (gpu_task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN) + { gpu_task->nb_first_stage_in++; + if(in_elem_dev->super.super.type == PARSEC_DEV_CUDA) + gpu_task->nb_first_stage_in_d2d++; + else + gpu_task->nb_first_stage_in_h2d++; + } else + { gpu_task->nb_sec_stage_in++; + if(in_elem_dev->super.super.type == PARSEC_DEV_CUDA) + gpu_task->nb_sec_stage_in_d2d++; + else + gpu_task->nb_sec_stage_in_h2d++; + } #endif /* If it is already under transfer, don't schedule the transfer again. @@ -2286,6 +2298,24 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, #if defined(PARSEC_PROF_TRACE) rc = cudaEventRecord( cuda_stream->begin_events[stream->start], cuda_stream->cuda_stream ); assert(cudaSuccess == rc); + + assert( task != NULL); + + if(stream == gpu_device->exec_stream[0]) //stage_in stream + { + if( task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time + task->first_stage_in_time_start = MPI_Wtime(); + else //second stage_in start time + task->sec_stage_in_time_start = MPI_Wtime(); + } + else if(stream == gpu_device->exec_stream[1]) //stage_out stream + { + task->stage_out_time_start = MPI_Wtime(); + } + else //execution stream + { + task->exec_time_start = MPI_Wtime(); + } #endif rc = progress_fct( gpu_device, task, stream ); @@ -2321,27 +2351,6 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, assert(cudaSuccess == rc); stream->tasks[stream->start] = task; -#if defined(PARSEC_PROF_TRACE) - assert( task != NULL); - - if(stream == gpu_device->exec_stream[0]) //stage_in stream - { - if( task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time - task->first_stage_in_time_start = MPI_Wtime(); - else //second stage_in start time - task->sec_stage_in_time_start = MPI_Wtime(); - } - else if(stream == gpu_device->exec_stream[1]) //stage_out stream - { - task->stage_out_time_start = MPI_Wtime(); - } - else //execution stream - { - task->exec_time_start = MPI_Wtime(); - } - -#endif - stream->start = (stream->start + 1) % stream->max_events; PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]: Submitted %s(task %p) priority %d on stream %s{%p}", @@ -2894,6 +2903,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->sec_waiting_tasks = 0; gpu_task->nb_first_stage_in = 0; gpu_task->nb_sec_stage_in = 0; + gpu_task->nb_first_stage_in_d2d = 0; + gpu_task->nb_first_stage_in_h2d = 0; + gpu_task->nb_sec_stage_in_d2d = 0; + gpu_task->nb_sec_stage_in_h2d = 0; } else { @@ -3124,6 +3137,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.mig_status = gpu_task->migrate_status; prof_info.nb_first_stage_in = gpu_task->nb_first_stage_in; prof_info.nb_sec_stage_in = gpu_task->nb_sec_stage_in; + prof_info.nb_first_stage_in_d2d = gpu_task->nb_first_stage_in_d2d; + prof_info.nb_first_stage_in_h2d = gpu_task->nb_first_stage_in_h2d; + prof_info.nb_sec_stage_in_d2d = gpu_task->nb_sec_stage_in_d2d; + prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index d3683cdee..4daffb43f 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -110,6 +110,10 @@ struct parsec_gpu_task_s { int32_t sec_waiting_tasks; int32_t nb_first_stage_in; int32_t nb_sec_stage_in; + int32_t nb_first_stage_in_d2d; + int32_t nb_first_stage_in_h2d; + int32_t nb_sec_stage_in_d2d; + int32_t nb_sec_stage_in_h2d; #endif union { struct { From 468767d3d81fcf20bcc7f9fb867004776d9f67b5 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 5 Oct 2022 00:30:28 -0400 Subject: [PATCH 185/215] trace updated. The clock speed of the device is foun just before the task execution is launched on the GPU. --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 ++++++-- parsec/mca/device/cuda/device_cuda_migrate.h | 1 + parsec/mca/device/cuda/device_cuda_module.c | 10 ++++++++++ parsec/mca/device/device_gpu.h | 1 + 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 08f654650..b0c3ce865 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -38,7 +38,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};task_type{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -83,6 +83,10 @@ int parsec_cuda_migrate_init(int ndevices) gpu_dev_profiling_init(); #endif +#if defined (PARSEC_PROF_TRACE) + nvmlInit_v2(); +#endif + return 0; } @@ -98,7 +102,7 @@ int parsec_cuda_migrate_fini() end = MPI_Wtime(); -#if defined(PARSEC_HAVE_CUDA) +#if defined(PARSEC_PROF_TRACE) nvmlShutdown(); #endif diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 2ab0b3c15..8bc9a91a4 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -91,6 +91,7 @@ typedef struct gpu_dev_prof_s double nb_first_stage_in_h2d; double nb_sec_stage_in_d2d; double nb_sec_stage_in_h2d; + double clock_speed; double task_type; } gpu_dev_prof_t; diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 0fc8fc572..ec8b58088 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2315,6 +2315,14 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, else //execution stream { task->exec_time_start = MPI_Wtime(); + + unsigned int clock = 0; + nvmlDevice_t dev; + rc = nvmlDeviceGetHandleByIndex_v2( CUDA_DEVICE_NUM(gpu_device->super.device_index), &dev ); + assert( NVML_SUCCESS == rc ); + rc = nvmlDeviceGetClockInfo( dev , NVML_CLOCK_SM, &clock ); + assert( NVML_SUCCESS == rc ); + task->clock_speed = clock; } #endif @@ -2907,6 +2915,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->nb_first_stage_in_h2d = 0; gpu_task->nb_sec_stage_in_d2d = 0; gpu_task->nb_sec_stage_in_h2d = 0; + gpu_task->clock_speed = 0; } else { @@ -3141,6 +3150,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.nb_first_stage_in_h2d = gpu_task->nb_first_stage_in_h2d; prof_info.nb_sec_stage_in_d2d = gpu_task->nb_sec_stage_in_d2d; prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; + prof_info.clock_speed = gpu_task->clock_speed; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 4daffb43f..8da9baa17 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -114,6 +114,7 @@ struct parsec_gpu_task_s { int32_t nb_first_stage_in_h2d; int32_t nb_sec_stage_in_d2d; int32_t nb_sec_stage_in_h2d; + int32_t clock_speed; #endif union { struct { From c7bcff1ddbf5a8f9010eecda5e0e5bd965da9c52 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 10 Oct 2022 22:40:12 -0400 Subject: [PATCH 186/215] Trace updated to collect time stamp in ns --- parsec/mca/device/cuda/device_cuda_migrate.c | 53 +++++++++++--------- parsec/mca/device/cuda/device_cuda_migrate.h | 4 +- parsec/mca/device/cuda/device_cuda_module.c | 36 ++++++++----- 3 files changed, 56 insertions(+), 37 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b0c3ce865..8863763d9 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,4 +1,5 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" +#include "parsec/include/parsec/os-spec-timing.h" #include "parsec/class/list.h" extern int parsec_device_cuda_enabled; @@ -13,8 +14,7 @@ static parsec_hash_table_t *task_mapping_ht = NULL; // hashtable for storing tas static int task_migrated_per_tp = 0; static int tp_count; -double start = 0; -double end = 0; +static parsec_time_t start; PARSEC_OBJ_CLASS_INSTANCE(migrated_task_t, parsec_list_item_t, NULL, NULL); @@ -38,7 +38,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double};class_id{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -53,7 +53,8 @@ static void gpu_dev_profiling_init() int parsec_cuda_migrate_init(int ndevices) { int i, j; - start = MPI_Wtime(); + + start = take_time(); NDEVICES = ndevices; device_info = (parsec_device_cuda_info_t *)calloc(ndevices, sizeof(parsec_device_cuda_info_t)); @@ -100,8 +101,6 @@ int parsec_cuda_migrate_fini() int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; - end = MPI_Wtime(); - #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); #endif @@ -175,16 +174,18 @@ int parsec_cuda_migrate_fini() else printf("Task selection : single pass \n" ); - printf("\n---------Execution time = %lf ------------ \n", end - start); + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); return 0; } -double current_time() +uint64_t time_stamp() { - return (MPI_Wtime() - start); + parsec_time_t now; + now = take_time(); + return diff_time(start, now); } /** @@ -246,7 +247,7 @@ int is_starving(int device) * starvtion if the number of ready tasks available is less than twice the * number of execution stream. */ - parsec_device_gpu_module_t *d = parsec_mca_device_get(DEVICE_NUM(device)); + parsec_device_gpu_module_t *d = (parsec_device_gpu_module_t *) parsec_mca_device_get(DEVICE_NUM(device)); return (d->mutex < 5) ? 1 : 0; // return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; @@ -313,7 +314,6 @@ int find_starving_device(int dealer_device) int parsec_cuda_mig_task_dequeue(parsec_execution_stream_t *es) { - char tmp[128]; migrated_task_t *mig_task = NULL; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *dealer_device = NULL; @@ -379,14 +379,13 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, parsec_device_gpu_module_t *starving_device, int selection_type) { - int starving_device_index = -1, dealer_device_index = 0; + int dealer_device_index = 0; int execution_level = 0; int deal_success = 0, device_affinity = 0; int i = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - starving_device_index = CUDA_DEVICE_NUM(starving_device->super.device_index); for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) { @@ -449,7 +448,7 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, else migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; - parsec_list_push_front(ring, migrated_gpu_task); + parsec_list_push_front(ring, (parsec_list_item_t *) migrated_gpu_task); } } // end for i @@ -470,6 +469,7 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_gpu_task_t **migrated_gpu_task) { + (void)es; int j = 0; int execution_level = 0; *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(&(dealer_device->pending)); // level 0 @@ -482,7 +482,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module */ if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) { - parsec_list_push_back(&(dealer_device->pending), *migrated_gpu_task); + parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *) *migrated_gpu_task); *migrated_gpu_task = NULL; } @@ -498,7 +498,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module { if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED) { - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, *migrated_gpu_task); + parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *) *migrated_gpu_task); *migrated_gpu_task = NULL; } } @@ -515,7 +515,7 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module { if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) { - parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, *migrated_gpu_task); + parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, (parsec_list_item_t *) *migrated_gpu_task); *migrated_gpu_task = NULL; } } @@ -619,6 +619,7 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul } // end of j } + (void)es; return execution_level; } @@ -702,6 +703,8 @@ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t } } + (void )es; + return execution_level; } @@ -719,7 +722,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ int starving_device_index = -1, dealer_device_index = 0; int nb_migrated = 0, execution_level = 0; int deal_success = 0; - int i = 0, j = 0, k = 0, d = 0; + int i = 0, k = 0, d = 0; parsec_gpu_task_t *migrated_gpu_task = NULL; parsec_device_gpu_module_t *starving_device = NULL; migrated_task_t *mig_task = NULL; @@ -746,7 +749,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ while (!parsec_list_nolock_is_empty(ring)) { - migrated_gpu_task = parsec_list_pop_front(ring); + migrated_gpu_task = (parsec_gpu_task_t *) parsec_list_pop_front(ring); assert(migrated_gpu_task != NULL); /** @@ -766,7 +769,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ mig_task->starving_device = starving_device; mig_task->stage_in_status = migrated_gpu_task->migrate_status; #if defined(PARSEC_PROF_TRACE) - migrated_gpu_task->select_time = MPI_Wtime(); + migrated_gpu_task->select_time = time_stamp(); #endif PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); @@ -806,9 +809,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { int i = 0; parsec_task_t *task = gpu_task->ec; - parsec_data_copy_t *src_copy = NULL; - char tmp[128]; - + /** * Data is already staged in the dealer device and we can find all the data * of the tasks to be migrated in the dealer device. @@ -983,9 +984,13 @@ int update_task_to_device_mapping(parsec_task_t *task, int device_index) parsec_hash_table_lock_bucket(task_mapping_ht, key); parsec_hash_table_nolock_insert(task_mapping_ht, &item->ht_item); parsec_hash_table_unlock_bucket(task_mapping_ht, key); + + return 1; } else item->ht_item.key = key; + + return 0; } /** @@ -1046,7 +1051,7 @@ int get_compute_tasks_executed(int device_index) int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status) { - int i, data_index; + int i; parsec_data_t *original = NULL; parsec_data_copy_t *data_copy = NULL; parsec_task_t *this_task = gpu_task->ec; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 8bc9a91a4..9d03ca754 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -93,6 +93,7 @@ typedef struct gpu_dev_prof_s double nb_sec_stage_in_h2d; double clock_speed; double task_type; + double class_id; } gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); @@ -108,7 +109,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, int stage_in_status); int gpu_data_version_increment(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t *gpu_device); -double current_time(); +uint64_t time_stamp(); int update_task_to_device_mapping(parsec_task_t *task, int device_index); int find_task_to_device_mapping(parsec_task_t *task); void clear_task_migrated_per_tp(); @@ -116,6 +117,7 @@ void print_task_migrated_per_tp(); int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); +int get_compute_tasks_executed(int device_index); int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index ec8b58088..3f76c03d7 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2198,10 +2198,18 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, assert( cudaSuccess == rc ); rc = cudaEventElapsedTime( &event_duration, cuda_stream->begin_events[stream->end], cuda_stream->events[stream->end] ); assert( cudaSuccess == rc ); - assert( event_duration >=0 ); - - event_duration = event_duration / 1000; // ms to sec to match with MPI_Wtime(); + assert( event_duration >=0 ); + /** + * cudaEventQuery() return time in milli sec, with a resolution of .5 micro sec. + * time_stamp() return time in nano sec. + * So convert ms to ns. + */ + event_duration = event_duration * 1000000; + /** + * @brief Updated the event end time based on the event start time + * and the elasped time after the start of the event. + */ if(stream == gpu_device->exec_stream[0]) //stage_in stream { if( (*out_task)->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) @@ -2304,17 +2312,17 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if(stream == gpu_device->exec_stream[0]) //stage_in stream { if( task->migrate_status < TASK_MIGRATED_AFTER_STAGE_IN ) //first stage_in start time - task->first_stage_in_time_start = MPI_Wtime(); + task->first_stage_in_time_start = time_stamp(); else //second stage_in start time - task->sec_stage_in_time_start = MPI_Wtime(); + task->sec_stage_in_time_start = time_stamp(); } else if(stream == gpu_device->exec_stream[1]) //stage_out stream { - task->stage_out_time_start = MPI_Wtime(); + task->stage_out_time_start = time_stamp(); } else //execution stream { - task->exec_time_start = MPI_Wtime(); + task->exec_time_start = time_stamp(); unsigned int clock = 0; nvmlDevice_t dev; @@ -2895,7 +2903,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #if defined(PARSEC_PROF_TRACE) if(gpu_task->migrate_status == TASK_NOT_MIGRATED) { - gpu_task->first_queue_time = MPI_Wtime(); + gpu_task->first_queue_time = time_stamp(); gpu_task->select_time = 0; gpu_task->second_queue_time = 0; gpu_task->exec_time_start = 0; @@ -2919,7 +2927,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } else { - gpu_task->second_queue_time = MPI_Wtime(); + gpu_task->second_queue_time = time_stamp(); gpu_task->sec_waiting_tasks = gpu_device->mutex - 1; } #endif @@ -3105,7 +3113,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #if defined(PARSEC_PROF_TRACE) if(gpu_task != NULL) - gpu_task->complete_time = MPI_Wtime(); + gpu_task->complete_time = time_stamp(); #endif /** @@ -3123,7 +3131,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); } - #if defined(PARSEC_PROF_TRACE) +#if defined(PARSEC_PROF_TRACE) + if( gpu_task != NULL ) + { gpu_dev_prof_t prof_info; prof_info.task_type = gpu_task->task_type; @@ -3151,12 +3161,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.nb_sec_stage_in_d2d = gpu_task->nb_sec_stage_in_d2d; prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; prof_info.clock_speed = gpu_task->clock_speed; + prof_info.class_id = gpu_task->ec->task_class->task_class_id; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); - #endif + } +#endif if (parsec_migrate_statistics) { From d360d02bf711f3ba4bc5c88b4a61a2f633b37524 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 12 Oct 2022 20:39:48 -0400 Subject: [PATCH 187/215] New selection policy 'affinity-only' added. This policy makes sure that only tasks with an affinity to the starving node is migrated. --- parsec/mca/device/cuda/device_cuda_migrate.c | 66 +++++++++++++++++--- parsec/mca/device/cuda/device_cuda_migrate.h | 10 ++- parsec/mca/device/cuda/device_cuda_module.c | 5 ++ 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 8863763d9..1faca9630 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -38,7 +38,7 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double};class_id{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double};class_id{double};exec_stream_index{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -168,11 +168,13 @@ int parsec_cuda_migrate_fini() } if(parsec_cuda_migrate_task_selection == 0) - printf("Task selection : single try \n" ); + printf("Task selection : single-try \n" ); + else if(parsec_cuda_migrate_task_selection == 1) + printf("Task selection : single-pass \n" ); else if(parsec_cuda_migrate_task_selection == 2) - printf("Task selection : two pass \n" ); + printf("Task selection : two-pass \n" ); else - printf("Task selection : single pass \n" ); + printf("Task selection : affinity-only \n" ); printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); PARSEC_OBJ_RELEASE(migrated_task_list); @@ -393,10 +395,12 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, if (selection_type == 0) execution_level = single_try_selection(es, dealer_device, &migrated_gpu_task); + else if (selection_type == 1) //default + execution_level = single_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); else if (selection_type == 2) execution_level = two_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); - else //default - execution_level = single_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); + else if (selection_type == 3) + execution_level = affinity_only_selection(es, dealer_device, starving_device, &migrated_gpu_task); if (migrated_gpu_task != NULL) { @@ -553,7 +557,7 @@ parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_mo break; } } - else if ( (pass_count == FIRST_PASS)) + else if ( (pass_count == FIRST_PASS) || (selection_type == AFFINITY_ONLY_SELECTION) ) { for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) { @@ -708,6 +712,54 @@ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t return execution_level; } + +/** + * @brief Select task from the different device queues using a single pass through the + * device queues. + * @param es + * @param dealer_device + * @param migrated_gpu_task + * @return int + */ + +int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) +{ + int j = 0; + int execution_level = 0; + + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); + execution_level = 0; + + if (*migrated_gpu_task == NULL) + { + // level1 - task is availble in the stage_in queue. Stage_in not started. + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); + execution_level = 1; + + if (*migrated_gpu_task == NULL) + { + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + { + // level2 - task is available in one of the execution queue stage_in is complete + *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); + + if (*migrated_gpu_task != NULL) + { + execution_level = 2; + break; + } + } + } // end of j + } + + (void)es; + return execution_level; +} + /** * @brief check if there are any devices starving. If there are any starving device migrate * task from the dealer device to the starving device. diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 9d03ca754..57ea40e94 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -17,9 +17,10 @@ #define TASK_MIGRATED_BEFORE_STAGE_IN 1 #define TASK_MIGRATED_AFTER_STAGE_IN 2 -#define SINGLE_TRY_SELECTION 0 -#define SINGLE_PASS_SELECTION 1 -#define TWO_PASS_SELECTION 2 +#define SINGLE_TRY_SELECTION 0 +#define SINGLE_PASS_SELECTION 1 +#define TWO_PASS_SELECTION 2 +#define AFFINITY_ONLY_SELECTION 3 #define FIRST_PASS 1 #define SECOND_PASS 2 @@ -94,6 +95,7 @@ typedef struct gpu_dev_prof_s double clock_speed; double task_type; double class_id; + double exec_stream_index; } gpu_dev_prof_t; int parsec_cuda_migrate_init(int ndevices); @@ -125,6 +127,8 @@ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_gpu_task_t **migrated_gpu_task); +int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, int pass_count, int selection_type); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3f76c03d7..64e27f67f 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -3000,6 +3000,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->ec->priority ); } +#if defined(PARSEC_PROF_TRACE) + if(gpu_task != NULL) + gpu_task->exec_stream_index = 2+exec_stream; +#endif rc = progress_stream( gpu_device, gpu_device->exec_stream[2+exec_stream], NULL, @@ -3162,6 +3166,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; prof_info.clock_speed = gpu_task->clock_speed; prof_info.class_id = gpu_task->ec->task_class->task_class_id; + prof_info.exec_stream_index = gpu_task->exec_stream_index; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, From 2d9b6e8e9a19625e5d58cdc75ad1d998948b0430 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 12 Oct 2022 22:40:23 -0400 Subject: [PATCH 188/215] New mca parameter added to decide whether a task completion should be carried out by the GPU manager thread or some other worker thread. --- parsec/mca/device/cuda/device_cuda_component.c | 14 +++++++++----- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++++++ parsec/mca/device/cuda/device_cuda_module.c | 12 +++++++++++- parsec/mca/device/device_gpu.h | 1 + parsec/scheduling.c | 11 ++++------- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 328bf82f9..472dfaafb 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -40,11 +40,12 @@ int parsec_device_cuda_enabled_index, parsec_device_cuda_enabled; int parsec_cuda_sort_pending = 0, parsec_cuda_max_streams = PARSEC_GPU_MAX_STREAMS; int parsec_cuda_memory_block_size, parsec_cuda_memory_percentage, parsec_cuda_memory_number_of_blocks; char* parsec_cuda_lib_path = NULL; -int parsec_cuda_migrate_tasks = 0; -int parsec_migrate_statistics = 0; -int parsec_cuda_iterative = 0; -int parsec_cuda_migrate_chunk_size = 0; -int parsec_cuda_migrate_task_selection = 0; +int parsec_cuda_migrate_tasks = 0; +int parsec_migrate_statistics = 0; +int parsec_cuda_iterative = 0; +int parsec_cuda_migrate_chunk_size = 0; +int parsec_cuda_migrate_task_selection = 0; +int parsec_cuda_delegate_task_completion = 0; static int cuda_mask, cuda_nvlink_mask; @@ -217,6 +218,9 @@ static int device_cuda_component_register(void) (void)parsec_mca_param_reg_int_name("device_cuda", "migrate_task_selection", "Integer to choose the method of task selection during migration", false, false, 1, &parsec_cuda_migrate_task_selection); + (void)parsec_mca_param_reg_int_name("device_cuda", "delegate_task_completion", + "Integer to choose the whether task completion should be done by a manager thread (default is yes)", + false, false, 1, &parsec_cuda_delegate_task_completion); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 1faca9630..0b7c7f15b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -6,6 +6,7 @@ extern int parsec_device_cuda_enabled; extern int parsec_migrate_statistics; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) +extern int parsec_cuda_delegate_task_completion; // task completion delegation parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; // list of all migrated task @@ -176,6 +177,11 @@ int parsec_cuda_migrate_fini() else printf("Task selection : affinity-only \n" ); + if(parsec_cuda_delegate_task_completion == 0) + printf("Task completion : not delegated\n"); + else + printf("Task completion : delegated\n"); + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 64e27f67f..58ba9e764 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -55,6 +55,7 @@ extern int parsec_gpu_task_count_start; extern int parsec_gpu_task_count_end; extern int parsec_cuda_migrate_tasks; extern int parsec_migrate_statistics; +extern int parsec_cuda_delegate_task_completion; /* look up how many FMA per cycle in single/double, per cuda MP * precision. @@ -3113,7 +3114,16 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_cuda_kernel_epilog( gpu_device, gpu_task ); gpu_device->super.executed_tasks++; - __parsec_complete_execution( es, gpu_task->ec ); + + if( parsec_cuda_delegate_task_completion == 0 ) + __parsec_complete_execution( es, gpu_task->ec ); + else + { + gpu_task->ec->priority = INT32_MAX; /* Assign maximum priority */ + gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE; + PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec); + __parsec_schedule(es, (parsec_task_t *)gpu_task->ec, 0); + } #if defined(PARSEC_PROF_TRACE) if(gpu_task != NULL) diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 8da9baa17..14345c1b7 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -115,6 +115,7 @@ struct parsec_gpu_task_s { int32_t nb_sec_stage_in_d2d; int32_t nb_sec_stage_in_h2d; int32_t clock_speed; + int32_t exec_stream_index; #endif union { struct { diff --git a/parsec/scheduling.c b/parsec/scheduling.c index 57b4da22a..88d3150b2 100644 --- a/parsec/scheduling.c +++ b/parsec/scheduling.c @@ -432,18 +432,15 @@ int __parsec_task_progress( parsec_execution_stream_t* es, PARSEC_PINS(es, PREPARE_INPUT_END, task); } - //A special case is added to deal with task completion of the GPU tasks - if( task->status == PARSEC_TASK_STATUS_COMPLETE) - { - __parsec_complete_execution( es, task ); - return PARSEC_HOOK_RETURN_DONE; - } - switch(rc) { case PARSEC_HOOK_RETURN_DONE: { if(task->status <= PARSEC_TASK_STATUS_HOOK) { rc = __parsec_execute( es, task ); } + else if(task->status == PARSEC_TASK_STATUS_COMPLETE) { // To deal with task completion of the GPU tasks + rc = PARSEC_HOOK_RETURN_DONE; + } + /* We're good to go ... */ switch(rc) { case PARSEC_HOOK_RETURN_DONE: /* This execution succeeded */ From f314734e4904dfb4541331f011032ca2b8a6f0f7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 24 Oct 2022 21:21:25 -0400 Subject: [PATCH 189/215] Statistics collected on the number of data evictions. Some other minor changes. --- parsec/mca/device/cuda/device_cuda_migrate.c | 46 +++++++++++++------- parsec/mca/device/cuda/device_cuda_migrate.h | 2 + parsec/mca/device/cuda/device_cuda_module.c | 20 ++++++--- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 0b7c7f15b..7a07978cb 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -101,6 +101,7 @@ int parsec_cuda_migrate_fini() int summary_total_tasks_migrated = 0, summary_total_l0_tasks_migrated = 0, summary_total_l1_tasks_migrated = 0, summary_total_l2_tasks_migrated = 0; int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; + int summary_total_evictions = 0; #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); @@ -123,6 +124,7 @@ int parsec_cuda_migrate_fini() avg_task_migrated = ((float)tot_task_migrated) / ((float)device_info[i].deal_count); deal_success_perc = (((float)device_info[i].success_count) / ((float)device_info[i].deal_count)) * 100; avg_task_migrated_per_sucess = ((float)tot_task_migrated) / ((float)device_info[i].success_count); + summary_total_evictions += device_info[i].evictions; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); @@ -145,6 +147,7 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per deal : %lf \n", avg_task_migrated); printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); printf("Perc of successfull deals : %lf \n", deal_success_perc); + printf("Evictions : %d \n", device_info[i].evictions); } printf("\n *********** SUMMARY *********** \n"); @@ -166,23 +169,26 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per deal : %lf \n", summary_avg_task_migrated); printf("Avg task migrated per successfull deal : %lf \n", summary_avg_task_migrated_per_sucess); printf("perc of successfull deals : %lf \n", summary_deal_success_perc); - } + printf("Total evictions : %d \n", summary_total_evictions); - if(parsec_cuda_migrate_task_selection == 0) + if(parsec_cuda_migrate_task_selection == 0) printf("Task selection : single-try \n" ); - else if(parsec_cuda_migrate_task_selection == 1) - printf("Task selection : single-pass \n" ); - else if(parsec_cuda_migrate_task_selection == 2) - printf("Task selection : two-pass \n" ); - else - printf("Task selection : affinity-only \n" ); - - if(parsec_cuda_delegate_task_completion == 0) - printf("Task completion : not delegated\n"); - else - printf("Task completion : delegated\n"); + else if(parsec_cuda_migrate_task_selection == 1) + printf("Task selection : single-pass \n" ); + else if(parsec_cuda_migrate_task_selection == 2) + printf("Task selection : two-pass \n" ); + else + printf("Task selection : affinity-only \n" ); + + if(parsec_cuda_delegate_task_completion == 0) + printf("Task completion : not delegated\n"); + else + printf("Task completion : delegated\n"); + + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); + } - printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); + PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -241,6 +247,12 @@ int parsec_cuda_tasks_executed(int device) return rc + 1; } +int parsec_cuda_inc_eviction_count(int device) +{ + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].evictions), 1); + return rc + 1; +} + /** * @brief returns 1 if the device is starving, 0 if its is not * @@ -909,7 +921,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || + task->data[i].data_out->version > task->data[i].data_in->version) { task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); @@ -942,7 +955,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version) + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || + task->data[i].data_out->version > task->data[i].data_in->version) { task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 57ea40e94..7aa11579f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -49,6 +49,7 @@ typedef struct parsec_device_cuda_info_s int success_count; int ready_compute_tasks; int total_compute_tasks; + int evictions; int affinity_count; } parsec_device_cuda_info_t; @@ -131,5 +132,6 @@ int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_mod parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, int pass_count, int selection_type); +int parsec_cuda_inc_eviction_count(int device_index); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 58ba9e764..3f4d03f20 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -602,7 +602,7 @@ parsec_cuda_module_fini(parsec_device_module_t* device) exec_stream->end = 0; for( k = 0; k < exec_stream->max_events; k++ ) { - assert( NULL == exec_stream->tasks[k] ); + //assert( NULL == exec_stream->tasks[k] ); status = cudaEventDestroy(cuda_stream->events[k]); PARSEC_CUDA_CHECK_ERROR( "(parsec_cuda_device_fini) cudaEventDestroy ", status, {continue;} ); @@ -996,6 +996,10 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, if( NULL == gpu_elem->device_private ) { #endif find_another_data: + + if (parsec_migrate_statistics) + parsec_cuda_inc_eviction_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + /* Look for a data_copy to free */ lru_gpu_elem = (parsec_gpu_data_copy_t*)parsec_list_pop_front(&gpu_device->gpu_mem_lru); if( NULL == lru_gpu_elem ) { @@ -1405,8 +1409,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, */ parsec_data_copy_t *candidate = gpu_task->candidate[flow->flow_index]; parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(candidate->device_index); - assert(PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 2) to Device to Device copy", gpu_device->super.name, candidate, candidate->readers, candidate->super.super.obj_reference_count, target->cuda_index); @@ -1421,7 +1425,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( /* Check for NULL is important. Otherwise, in cases where the flow is a NEW on GPU it will cause problems */ gpu_task->original_data_in[ flow->flow_index ] != NULL && gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - PARSEC_OBJ_RELEASE(task_data->data_in); + { + PARSEC_OBJ_RELEASE(task_data->data_in); + } /** * if the data was already staged_in then we would have already incremented @@ -1454,8 +1460,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); // Remember the original data_in. - if( gpu_task->original_data_in[ flow->flow_index ] == NULL) - gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + assert( gpu_task->original_data_in[ flow->flow_index ] == NULL); + gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; goto src_selected; } @@ -1489,8 +1495,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); // Remember the original data_in. - if( gpu_task->original_data_in[ flow->flow_index ] == NULL) - gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; + assert( gpu_task->original_data_in[ flow->flow_index ] == NULL); + gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; PARSEC_DATA_COPY_INC_READERS_ATOMIC(candidate); undo_readers_inc_if_no_transfer = 1; From a073f2c58bc2c71dfd908d3e28fa3348d6def2dd Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 25 Oct 2022 06:36:46 -0400 Subject: [PATCH 190/215] selection policy updated to use the queues better. --- parsec/mca/device/cuda/device_cuda_migrate.c | 567 ++++++++++--------- parsec/mca/device/cuda/device_cuda_migrate.h | 28 +- 2 files changed, 306 insertions(+), 289 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 7a07978cb..b9c135cdd 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,8 +4,8 @@ extern int parsec_device_cuda_enabled; extern int parsec_migrate_statistics; -extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) -extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) +extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) +extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) extern int parsec_cuda_delegate_task_completion; // task completion delegation parsec_device_cuda_info_t *device_info; @@ -85,7 +85,7 @@ int parsec_cuda_migrate_init(int ndevices) gpu_dev_profiling_init(); #endif -#if defined (PARSEC_PROF_TRACE) +#if defined(PARSEC_PROF_TRACE) nvmlInit_v2(); #endif @@ -171,24 +171,23 @@ int parsec_cuda_migrate_fini() printf("perc of successfull deals : %lf \n", summary_deal_success_perc); printf("Total evictions : %d \n", summary_total_evictions); - if(parsec_cuda_migrate_task_selection == 0) - printf("Task selection : single-try \n" ); - else if(parsec_cuda_migrate_task_selection == 1) - printf("Task selection : single-pass \n" ); - else if(parsec_cuda_migrate_task_selection == 2) - printf("Task selection : two-pass \n" ); + if (parsec_cuda_migrate_task_selection == 0) + printf("Task selection : single-try \n"); + else if (parsec_cuda_migrate_task_selection == 1) + printf("Task selection : single-pass \n"); + else if (parsec_cuda_migrate_task_selection == 2) + printf("Task selection : two-pass \n"); else - printf("Task selection : affinity-only \n" ); + printf("Task selection : affinity-only \n"); - if(parsec_cuda_delegate_task_completion == 0) + if (parsec_cuda_delegate_task_completion == 0) printf("Task completion : not delegated\n"); else printf("Task completion : delegated\n"); - printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double) time_stamp() / 1000000000); + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double)time_stamp() / 1000000000); } - PARSEC_OBJ_RELEASE(migrated_task_list); free(device_info); @@ -267,7 +266,7 @@ int is_starving(int device) * starvtion if the number of ready tasks available is less than twice the * number of execution stream. */ - parsec_device_gpu_module_t *d = (parsec_device_gpu_module_t *) parsec_mca_device_get(DEVICE_NUM(device)); + parsec_device_gpu_module_t *d = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(device)); return (d->mutex < 5) ? 1 : 0; // return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; @@ -384,398 +383,415 @@ int parsec_cuda_mig_task_enqueue(parsec_execution_stream_t *es, migrated_task_t return 0; } -/** - * @brief Select the victim task for migration. - * - * @param es - * @param ring - * @param dealer_device - * @param starving_device - * @return int - */ - -int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, - parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, - int selection_type) +int set_migrate_status(parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, + parsec_gpu_task_t *migrated_gpu_task, int execution_level) { - int dealer_device_index = 0; - int execution_level = 0; - int deal_success = 0, device_affinity = 0; - int i = 0; - parsec_gpu_task_t *migrated_gpu_task = NULL; + int dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); + int device_affinity = 0; - dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); + /** + * @brief change migrate_status according to the status of the stage in of the + * stage_in data. + */ + if (execution_level == 2) + migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; + else + migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; + + // keep track of compute task count. Decrement compute task count. + dec_compute_task_count(dealer_device_index); - for (i = 0; i < parsec_cuda_migrate_chunk_size; i++) + if (parsec_migrate_statistics) { - migrated_gpu_task = NULL; - - if (selection_type == 0) - execution_level = single_try_selection(es, dealer_device, &migrated_gpu_task); - else if (selection_type == 1) //default - execution_level = single_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); - else if (selection_type == 2) - execution_level = two_pass_selection(es, dealer_device, starving_device, &migrated_gpu_task); - else if (selection_type == 3) - execution_level = affinity_only_selection(es, dealer_device, starving_device, &migrated_gpu_task); - - if (migrated_gpu_task != NULL) + if (execution_level == 0) { - assert(migrated_gpu_task->ec != NULL); - PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)migrated_gpu_task); - - // keep track of compute task count. Decrement compute task count. - dec_compute_task_count(dealer_device_index); - - if (parsec_migrate_statistics) - { - if (execution_level == 0) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); - device_info[dealer_device_index].level0++; - } - if (execution_level == 1) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); - device_info[dealer_device_index].level1++; - } - if (execution_level == 2) - { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); - device_info[dealer_device_index].level2++; - } - } - - deal_success++; - parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); - - if (parsec_migrate_statistics) - { - if (execution_level == 2) - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); - else - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - - if (device_affinity) - device_info[dealer_device_index].affinity_count++; - } - - /** - * @brief change migrate_status according to the status of the stage in of the - * stage_in data. - */ - if (execution_level == 2) - migrated_gpu_task->migrate_status = TASK_MIGRATED_AFTER_STAGE_IN; - else - migrated_gpu_task->migrate_status = TASK_MIGRATED_BEFORE_STAGE_IN; - - parsec_list_push_front(ring, (parsec_list_item_t *) migrated_gpu_task); + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); + device_info[dealer_device_index].level0++; + } + if (execution_level == 1) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); + device_info[dealer_device_index].level1++; + } + if (execution_level == 2) + { + parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); + device_info[dealer_device_index].level2++; } - } // end for i + } - return deal_success; + parsec_atomic_fetch_inc_int32(&task_migrated_per_tp); + if (parsec_migrate_statistics) + { + if (execution_level == 2) + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + else + device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + if (device_affinity) + device_info[dealer_device_index].affinity_count++; + } + + return migrated_gpu_task->migrate_status; } /** - * @brief Select task from the different device queues using a single try on each device queue. - * If first try in a queue fails (that is if the first task is not a compute task or a task that - * is already migrated) we move on to the next queue. + * @brief Select the victim task for migration. * * @param es + * @param ring: ring of selected task returned by the selection policy * @param dealer_device - * @param migrated_gpu_task + * @param starving_device + * @param selection_type: mca parameter * @return int */ -int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t **migrated_gpu_task) +int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, + parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, + int selection_type) { - (void)es; - int j = 0; - int execution_level = 0; - *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(&(dealer_device->pending)); // level 0 + int deal_success = 0; - if (*migrated_gpu_task != NULL) - { - /** - * @brief if the task is a not a computational kerenel or if it is a task that has - * already been migrated, we stop the migration and push it back to the queue. - */ - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) - { - parsec_list_push_back(&(dealer_device->pending), (parsec_list_item_t *) *migrated_gpu_task); - *migrated_gpu_task = NULL; - } + if (selection_type == 0) + deal_success = single_try_selection(es, dealer_device, starving_device, ring); + else if (selection_type == 1) // default + deal_success = single_pass_selection(es, dealer_device, starving_device, ring); + else if (selection_type == 2) + deal_success = two_pass_selection(es, dealer_device, starving_device, ring); + else if (selection_type == 3) + deal_success = affinity_only_selection(es, dealer_device, starving_device, ring); - execution_level = 0; - } + return deal_success; +} - if (*migrated_gpu_task == NULL) - { - // level1 - task is aavailble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[0]->fifo_pending); // level 1 +int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, int stage_in_status, + int pass_count, int selection_type, int execution_level, parsec_list_t *ring, + int *tries, int *deal_success) +{ + parsec_list_item_t *item = NULL; + parsec_gpu_task_t *task = NULL; + int device_affinity; - if (*migrated_gpu_task != NULL) - { - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED) - { - parsec_list_push_back(dealer_device->exec_stream[0]->fifo_pending, (parsec_list_item_t *) *migrated_gpu_task); - *migrated_gpu_task = NULL; - } - } - execution_level = 1; + assert(list != NULL); - if (*migrated_gpu_task == NULL) + if (selection_type == SINGLE_TRY_SELECTION) + { + do { - for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + *tries += 1; + task = (parsec_gpu_task_t *)parsec_list_pop_back(list); + if (task != NULL) { - // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_back(dealer_device->exec_stream[(2 + j)]->fifo_pending); // level2 - - if (*migrated_gpu_task != NULL) + if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) { - if ((*migrated_gpu_task)->task_type != PARSEC_GPU_TASK_TYPE_KERNEL || (*migrated_gpu_task)->migrate_status > TASK_NOT_MIGRATED ) - { - parsec_list_push_back(dealer_device->exec_stream[(2 + j)]->fifo_pending, (parsec_list_item_t *) *migrated_gpu_task); - *migrated_gpu_task = NULL; - } + set_migrate_status(dealer_device, starving_device, task, execution_level); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); + parsec_list_push_front(ring, (parsec_list_item_t *)task); + *deal_success += 1; } - - if (*migrated_gpu_task != NULL) + else { - execution_level = 2; - break; + parsec_list_push_back(list, (parsec_list_item_t *)task); + task = NULL; } } - } // end of j - } + } while ((*tries < parsec_cuda_migrate_chunk_size) && (task != NULL)); - return execution_level; -} - -parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, - int pass_count, int selection_type) -{ - parsec_list_item_t *item = NULL; - parsec_gpu_task_t *task = NULL; - int device_affinity; - - assert(list != NULL); + return *deal_success; + } parsec_list_lock(list); - if ( (pass_count == SECOND_PASS) || (selection_type == SINGLE_PASS_SELECTION) ) + if ((pass_count == SECOND_PASS) || (selection_type == SINGLE_PASS_SELECTION)) { - for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + for (item = PARSEC_LIST_ITERATOR_FIRST(list); + (PARSEC_LIST_ITERATOR_END(list) != item) && (*tries < parsec_cuda_migrate_chunk_size); + item = PARSEC_LIST_ITERATOR_NEXT(item)) { task = (parsec_gpu_task_t *)item; - if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) - break; + if ((task != NULL) && (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) + { + /** + * parsec_list_nolock_remove returns the previous element in the + * linked list. This will preserve the chain of iteration. + */ + item = parsec_list_nolock_remove(list, item); + set_migrate_status(dealer_device, starving_device, task, execution_level); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); + parsec_list_push_front(ring, (parsec_list_item_t *)task); + *tries = *tries + 1; + *deal_success += 1; + } } } - else if ( (pass_count == FIRST_PASS) || (selection_type == AFFINITY_ONLY_SELECTION) ) + else if ((pass_count == FIRST_PASS) || (selection_type == AFFINITY_ONLY_SELECTION)) { - for (item = PARSEC_LIST_ITERATOR_FIRST(list); PARSEC_LIST_ITERATOR_END(list) != item; item = PARSEC_LIST_ITERATOR_NEXT(item)) + for (item = PARSEC_LIST_ITERATOR_FIRST(list); + (PARSEC_LIST_ITERATOR_END(list) != item) && (*tries < parsec_cuda_migrate_chunk_size); + item = PARSEC_LIST_ITERATOR_NEXT(item)) { task = (parsec_gpu_task_t *)item; device_affinity = find_task_affinity(task, starving_device->super.device_index, stage_in_status); - if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED) && (device_affinity > 0)) - break; + if ((task != NULL) && (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && + (task->migrate_status == TASK_NOT_MIGRATED) && (device_affinity > 0)) + { + item = parsec_list_nolock_remove(list, item); + set_migrate_status(dealer_device, starving_device, task, execution_level); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); + parsec_list_push_front(ring, (parsec_list_item_t *)task); + *tries = *tries + 1; + *deal_success += 1; + } } } parsec_list_unlock(list); - if ((item != NULL) && (PARSEC_LIST_ITERATOR_END(list) != item)) + return *deal_success; +} + +/** + * @brief Tries to select the first task in a queue. If that fails it moves on to the + * next queue. + */ + +int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring) +{ + int j = 0; + int execution_level = 0; + /** + * @brief Keep tracks of the number of times we try to select a task. + * Upper limit is the parsec_cuda_migrate_chunk_size set by the + * mca parameter + */ + int tries = 0; + /** + * @brief Keeps track of the number of successfull tasks migrated. + * This is very important as this value is deducted from the total tasks + * the dealer device will execute + * + */ + int deal_success = 0; + + (void)es; + + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_TRY_SELECTION, execution_level, ring, &tries, &deal_success); + + if (tries < parsec_cuda_migrate_chunk_size) { - parsec_list_nolock_remove(list, item); - return item; + // level1 - task is availble in the stage_in queue. Stage_in not started. + execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_TRY_SELECTION, execution_level, ring, &tries, &deal_success); + + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + { + if (tries < parsec_cuda_migrate_chunk_size) + { + // level2 - task is available in one of the execution queue stage_in is complete + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, SINGLE_TRY_SELECTION, execution_level, ring, &tries, &deal_success); + } + else + break; + } } - return NULL; + return deal_success; } /** * @brief Select task from the different device queues using a single pass through the - * device queues. - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int + * device queues. */ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring) { int j = 0; int execution_level = 0; + /** + * @brief Keep tracks of the number of times we try to select a task. + * Upper limit is the parsec_cuda_migrate_chunk_size set by the + * mca parameter + */ + int tries = 0; + /** + * @brief Keeps track of the number of successfull tasks migrated. + * This is very important as this value is deducted from the total tasks + * the dealer device will execute + * + */ + int deal_success = 0; - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION); - execution_level = 0; + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + if (tries < parsec_cuda_migrate_chunk_size) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION); execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, SINGLE_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { - for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + if (tries < parsec_cuda_migrate_chunk_size) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, - TASK_MIGRATED_AFTER_STAGE_IN, -1, SINGLE_PASS_SELECTION); - - if (*migrated_gpu_task != NULL) - { - execution_level = 2; - break; - } + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, SINGLE_PASS_SELECTION, execution_level, ring, &tries, &deal_success); } - } // end of j + else + break; + } } (void)es; - return execution_level; + return deal_success; } /** * @brief Select task from the different device queues using a two pass through the * device queues. The first pass only selects a task with an affinity to the starving - * device. If the first pass does not yield any tasks, the second pass selects any available - * compute tasks. - * - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int + * device. If the first pass does not yield the required number of tasks, the + * second pass selects any available compute tasks. */ int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring) { int j = 0; int execution_level = 0; + /** + * @brief Keep tracks of the number of times we try to select a task. + * Upper limit is the parsec_cuda_migrate_chunk_size set by the + * mca parameter + */ + int tries = 0; + /** + * @brief Keeps track of the number of successfull tasks migrated. + * This is very important as this value is deducted from the total tasks + * the dealer device will execute + * + */ + int deal_success = 0; + + (void)es; // FIRST PASS - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); - execution_level = 0; + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + if (tries < parsec_cuda_migrate_chunk_size) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { - for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + if (tries < parsec_cuda_migrate_chunk_size) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, - TASK_MIGRATED_AFTER_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION); - - if (*migrated_gpu_task != NULL) - { - execution_level = 2; - break; - } + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, FIRST_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); } - } // end of j + else + break; + } } // SECOND PASS - if (*migrated_gpu_task == NULL) + if (tries < parsec_cuda_migrate_chunk_size) { - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); execution_level = 0; + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + if (tries < parsec_cuda_migrate_chunk_size) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { - for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + if (tries < parsec_cuda_migrate_chunk_size) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, - TASK_MIGRATED_AFTER_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION); - - if (*migrated_gpu_task != NULL) - { - execution_level = 2; - break; - } + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, SECOND_PASS, TWO_PASS_SELECTION, execution_level, ring, &tries, &deal_success); } - } // end of j + else + break; + } } } - (void )es; - - return execution_level; + return deal_success; } - /** - * @brief Select task from the different device queues using a single pass through the - * device queues. - * @param es - * @param dealer_device - * @param migrated_gpu_task - * @return int + * @brief Select only the tasks with an affinity with the starving device. */ int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task) + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring) { int j = 0; int execution_level = 0; + /** + * @brief Keep tracks of the number of times we try to select a task. + * Upper limit is the parsec_cuda_migrate_chunk_size set by the + * mca parameter + */ + int tries = 0; + /** + * @brief Keeps track of the number of successfull tasks migrated. + * This is very important as this value is deducted from the total tasks + * the dealer device will execute + * + */ + int deal_success = 0; - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(&(dealer_device->pending), starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); - execution_level = 0; + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + if (tries < parsec_cuda_migrate_chunk_size) { // level1 - task is availble in the stage_in queue. Stage_in not started. - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, starving_device, - TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, AFFINITY_ONLY_SELECTION, execution_level, ring, &tries, &deal_success); - if (*migrated_gpu_task == NULL) + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) { - for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + if (tries < parsec_cuda_migrate_chunk_size) { // level2 - task is available in one of the execution queue stage_in is complete - *migrated_gpu_task = (parsec_gpu_task_t *)find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, starving_device, - TASK_MIGRATED_AFTER_STAGE_IN, -1, AFFINITY_ONLY_SELECTION); - - if (*migrated_gpu_task != NULL) - { - execution_level = 2; - break; - } + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, AFFINITY_ONLY_SELECTION, execution_level, ring, &tries, &deal_success); } - } // end of j + else + break; + } } (void)es; - return execution_level; + return deal_success; } /** @@ -819,7 +835,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ while (!parsec_list_nolock_is_empty(ring)) { - migrated_gpu_task = (parsec_gpu_task_t *) parsec_list_pop_front(ring); + migrated_gpu_task = (parsec_gpu_task_t *)parsec_list_pop_front(ring); assert(migrated_gpu_task != NULL); /** @@ -833,14 +849,13 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_OBJ_CONSTRUCT(mig_task, parsec_list_item_t); mig_task->gpu_task = migrated_gpu_task; - for (k = 0; k < MAX_PARAM_COUNT; k++) - migrated_gpu_task->candidate[i] = NULL; + for (k = 0; k < MAX_PARAM_COUNT; k++) migrated_gpu_task->candidate[i] = NULL; mig_task->dealer_device = dealer_device; mig_task->starving_device = starving_device; mig_task->stage_in_status = migrated_gpu_task->migrate_status; -#if defined(PARSEC_PROF_TRACE) + #if defined(PARSEC_PROF_TRACE) migrated_gpu_task->select_time = time_stamp(); -#endif + #endif PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); @@ -879,7 +894,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t { int i = 0; parsec_task_t *task = gpu_task->ec; - + /** * Data is already staged in the dealer device and we can find all the data * of the tasks to be migrated in the dealer device. @@ -921,7 +936,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || + if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || task->data[i].data_out->version > task->data[i].data_in->version) { task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 7aa11579f..1ffbecd42 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -13,16 +13,16 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) -#define TASK_NOT_MIGRATED 0 +#define TASK_NOT_MIGRATED 0 #define TASK_MIGRATED_BEFORE_STAGE_IN 1 -#define TASK_MIGRATED_AFTER_STAGE_IN 2 +#define TASK_MIGRATED_AFTER_STAGE_IN 2 -#define SINGLE_TRY_SELECTION 0 -#define SINGLE_PASS_SELECTION 1 -#define TWO_PASS_SELECTION 2 -#define AFFINITY_ONLY_SELECTION 3 +#define SINGLE_TRY_SELECTION 0 +#define SINGLE_PASS_SELECTION 1 +#define TWO_PASS_SELECTION 2 +#define AFFINITY_ONLY_SELECTION 3 -#define FIRST_PASS 1 +#define FIRST_PASS 1 #define SECOND_PASS 2 /** @@ -123,15 +123,17 @@ int inc_compute_tasks_executed(int device_index); int get_compute_tasks_executed(int device_index); int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_gpu_task_t **migrated_gpu_task); + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, - parsec_device_gpu_module_t *starving_device, parsec_gpu_task_t **migrated_gpu_task); -parsec_list_item_t* find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *starving_device, int stage_in_status, - int pass_count, int selection_type); + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); +int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, int stage_in_status, + int pass_count, int selection_type, int execution_level, + parsec_list_t *ring, int *tries, int *deal_success); int parsec_cuda_inc_eviction_count(int device_index); #endif From 44ffba98a6e896bdad686c2db68b5ec3efc1a10e Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 25 Oct 2022 07:15:46 -0400 Subject: [PATCH 191/215] statistics updated to count the number of stage in and the perc of evictions wrt stage in. --- parsec/mca/device/cuda/device_cuda_migrate.c | 30 +++++++++++++++++++- parsec/mca/device/cuda/device_cuda_migrate.h | 6 +++- parsec/mca/device/cuda/device_cuda_module.c | 6 ++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index b9c135cdd..4e446948f 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -76,6 +76,9 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].success_count = 0; device_info[i].ready_compute_tasks = 0; device_info[i].affinity_count = 0; + device_info[i].evictions = 0; + device_info[i].nb_stage_in = 0; + device_info[i].nb_stage_in_req= 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -101,7 +104,7 @@ int parsec_cuda_migrate_fini() int summary_total_tasks_migrated = 0, summary_total_l0_tasks_migrated = 0, summary_total_l1_tasks_migrated = 0, summary_total_l2_tasks_migrated = 0; int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; - int summary_total_evictions = 0; + int summary_total_evictions = 0, summary_total_stage_in = 0, summary_total_stage_in_req; #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); @@ -125,6 +128,8 @@ int parsec_cuda_migrate_fini() deal_success_perc = (((float)device_info[i].success_count) / ((float)device_info[i].deal_count)) * 100; avg_task_migrated_per_sucess = ((float)tot_task_migrated) / ((float)device_info[i].success_count); summary_total_evictions += device_info[i].evictions; + summary_total_stage_in += device_info[i].nb_stage_in; + summary_total_stage_in_req = device_info[i].nb_stage_in_req; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); @@ -148,6 +153,10 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); printf("Perc of successfull deals : %lf \n", deal_success_perc); printf("Evictions : %d \n", device_info[i].evictions); + printf("Stage in initiated : %d \n", device_info[i].nb_stage_in); + printf("Stage in required : %d \n", device_info[i].nb_stage_in_req); + printf("Perc eviction for stage in initiated : %lf \n", (( (float)device_info[i].evictions / device_info[i].nb_stage_in) * 100 ) ); + printf("Perc eviction for stage in required : %lf \n", (((float)device_info[i].evictions / device_info[i].nb_stage_in_req) * 100 )); } printf("\n *********** SUMMARY *********** \n"); @@ -169,7 +178,14 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per deal : %lf \n", summary_avg_task_migrated); printf("Avg task migrated per successfull deal : %lf \n", summary_avg_task_migrated_per_sucess); printf("perc of successfull deals : %lf \n", summary_deal_success_perc); + printf("Total evictions : %d \n", summary_total_evictions); + printf("Total stage in initiated : %d \n", summary_total_stage_in); + printf("Total stage in required : %d \n", summary_total_stage_in_req); + printf("Perc eviction for stage in initiated : %lf \n", (((float)summary_total_evictions / summary_total_stage_in) * 100 ) ); + printf("Perc eviction for stage in required : %lf \n", (((float)summary_total_evictions / summary_total_stage_in_req) * 100 ) ); + + if (parsec_cuda_migrate_task_selection == 0) printf("Task selection : single-try \n"); @@ -252,6 +268,18 @@ int parsec_cuda_inc_eviction_count(int device) return rc + 1; } +int parsec_cuda_inc_stage_in_count(int device) +{ + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].nb_stage_in), 1); + return rc + 1; +} + +int parsec_cuda_inc_stage_in_req_count(int device) +{ + int rc = parsec_atomic_fetch_add_int32(&(device_info[device].nb_stage_in_req), 1); + return rc + 1; +} + /** * @brief returns 1 if the device is starving, 0 if its is not * diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 1ffbecd42..7b2a27396 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -49,8 +49,10 @@ typedef struct parsec_device_cuda_info_s int success_count; int ready_compute_tasks; int total_compute_tasks; - int evictions; int affinity_count; + int evictions; + int nb_stage_in; + int nb_stage_in_req; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -135,5 +137,7 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d int pass_count, int selection_type, int execution_level, parsec_list_t *ring, int *tries, int *deal_success); int parsec_cuda_inc_eviction_count(int device_index); +int parsec_cuda_inc_stage_in_count(int device); +int parsec_cuda_inc_stage_in_req_count(int device); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3f4d03f20..3050c77e6 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1550,6 +1550,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } /* Do not need to be tranferred */ + if (parsec_migrate_statistics) + parsec_cuda_inc_stage_in_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + if( -1 == transfer_from ) { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; @@ -1561,6 +1564,9 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } else { + if (parsec_migrate_statistics) + parsec_cuda_inc_stage_in_req_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + /* Update the transferred required_data_in size */ gpu_device->super.required_data_in += original->nb_elts; From 14b72d93d4004eb79f2d1584896c456c3958ecb8 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 26 Oct 2022 23:02:51 -0400 Subject: [PATCH 192/215] data resuse selection policy implemented --- parsec/mca/device/cuda/device_cuda_migrate.c | 185 ++++++++++++++++--- parsec/mca/device/cuda/device_cuda_migrate.h | 18 +- 2 files changed, 174 insertions(+), 29 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 4e446948f..eafd94e14 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -187,14 +187,16 @@ int parsec_cuda_migrate_fini() - if (parsec_cuda_migrate_task_selection == 0) + if (parsec_cuda_migrate_task_selection == SINGLE_TRY_SELECTION) printf("Task selection : single-try \n"); - else if (parsec_cuda_migrate_task_selection == 1) + else if (parsec_cuda_migrate_task_selection == SINGLE_PASS_SELECTION) printf("Task selection : single-pass \n"); - else if (parsec_cuda_migrate_task_selection == 2) + else if (parsec_cuda_migrate_task_selection == TWO_PASS_SELECTION) printf("Task selection : two-pass \n"); - else + else if (parsec_cuda_migrate_task_selection == AFFINITY_ONLY_SELECTION) printf("Task selection : affinity-only \n"); + else if (parsec_cuda_migrate_task_selection == DATA_REUSE_SELECTION) + printf("Task selection : data-reuse \n"); if (parsec_cuda_delegate_task_completion == 0) printf("Task completion : not delegated\n"); @@ -415,7 +417,7 @@ int set_migrate_status(parsec_device_gpu_module_t *dealer_device, parsec_device_ parsec_gpu_task_t *migrated_gpu_task, int execution_level) { int dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - int device_affinity = 0; + int affinity = 0; /** * @brief change migrate_status according to the status of the stage in of the @@ -452,10 +454,10 @@ int set_migrate_status(parsec_device_gpu_module_t *dealer_device, parsec_device_ if (parsec_migrate_statistics) { if (execution_level == 2) - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + affinity = find_task_affinity_to_starving_node(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); else - device_affinity = find_task_affinity(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); - if (device_affinity) + affinity = find_task_affinity_to_starving_node(migrated_gpu_task, starving_device->super.device_index, TASK_MIGRATED_BEFORE_STAGE_IN); + if (affinity) device_info[dealer_device_index].affinity_count++; } @@ -479,14 +481,16 @@ int select_tasks(parsec_execution_stream_t *es, parsec_list_t *ring, { int deal_success = 0; - if (selection_type == 0) + if (selection_type == SINGLE_TRY_SELECTION) deal_success = single_try_selection(es, dealer_device, starving_device, ring); - else if (selection_type == 1) // default + else if (selection_type == SINGLE_PASS_SELECTION) // default deal_success = single_pass_selection(es, dealer_device, starving_device, ring); - else if (selection_type == 2) + else if (selection_type == TWO_PASS_SELECTION) deal_success = two_pass_selection(es, dealer_device, starving_device, ring); - else if (selection_type == 3) + else if (selection_type == AFFINITY_ONLY_SELECTION) deal_success = affinity_only_selection(es, dealer_device, starving_device, ring); + else if (selection_type == DATA_REUSE_SELECTION) + deal_success = data_reuse_selection(es, dealer_device, starving_device, ring); return deal_success; } @@ -498,11 +502,11 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d { parsec_list_item_t *item = NULL; parsec_gpu_task_t *task = NULL; - int device_affinity; + int affinity; assert(list != NULL); - if (selection_type == SINGLE_TRY_SELECTION) + if ( selection_type == SINGLE_TRY_SELECTION ) { do { @@ -514,7 +518,7 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d { set_migrate_status(dealer_device, starving_device, task, execution_level); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); - parsec_list_push_front(ring, (parsec_list_item_t *)task); + parsec_list_push_back(ring, (parsec_list_item_t *)task); *deal_success += 1; } else @@ -530,7 +534,40 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d parsec_list_lock(list); - if ((pass_count == SECOND_PASS) || (selection_type == SINGLE_PASS_SELECTION)) + if ( selection_type == DATA_REUSE_SELECTION ) + { + + + for (item = PARSEC_LIST_ITERATOR_FIRST(list); + (PARSEC_LIST_ITERATOR_END(list) != item) && (*tries < parsec_cuda_migrate_chunk_size); + item = PARSEC_LIST_ITERATOR_NEXT(item)) + { + parsec_gpu_task_t *selected_task; + if(parsec_list_nolock_is_empty( ring )) + selected_task = NULL; + else + selected_task = (parsec_gpu_task_t *)PARSEC_LIST_ITERATOR_LAST(ring); + + task = (parsec_gpu_task_t *)item; + affinity = find_task_to_task_affinity(selected_task, task, stage_in_status); + if ((task != NULL) && (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && + (task->migrate_status == TASK_NOT_MIGRATED) && (affinity > 0)) + { + item = parsec_list_nolock_remove(list, item); + set_migrate_status(dealer_device, starving_device, task, execution_level); + PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); + parsec_list_push_back(ring, (parsec_list_item_t *)task); + *tries = *tries + 1; + *deal_success += 1; + } + } + + + + } + + if (( pass_count == SECOND_PASS && selection_type == TWO_PASS_SELECTION ) || + ( selection_type == SINGLE_PASS_SELECTION )) { for (item = PARSEC_LIST_ITERATOR_FIRST(list); (PARSEC_LIST_ITERATOR_END(list) != item) && (*tries < parsec_cuda_migrate_chunk_size); @@ -547,28 +584,29 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d item = parsec_list_nolock_remove(list, item); set_migrate_status(dealer_device, starving_device, task, execution_level); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); - parsec_list_push_front(ring, (parsec_list_item_t *)task); + parsec_list_push_back(ring, (parsec_list_item_t *)task); *tries = *tries + 1; *deal_success += 1; } } } - else if ((pass_count == FIRST_PASS) || (selection_type == AFFINITY_ONLY_SELECTION)) + else if (( pass_count == FIRST_PASS && selection_type == TWO_PASS_SELECTION ) || + ( selection_type == AFFINITY_ONLY_SELECTION)) { for (item = PARSEC_LIST_ITERATOR_FIRST(list); (PARSEC_LIST_ITERATOR_END(list) != item) && (*tries < parsec_cuda_migrate_chunk_size); item = PARSEC_LIST_ITERATOR_NEXT(item)) { task = (parsec_gpu_task_t *)item; - device_affinity = find_task_affinity(task, starving_device->super.device_index, stage_in_status); + affinity = find_task_affinity_to_starving_node(task, starving_device->super.device_index, stage_in_status); if ((task != NULL) && (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && - (task->migrate_status == TASK_NOT_MIGRATED) && (device_affinity > 0)) + (task->migrate_status == TASK_NOT_MIGRATED) && (affinity > 0)) { item = parsec_list_nolock_remove(list, item); set_migrate_status(dealer_device, starving_device, task, execution_level); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); - parsec_list_push_front(ring, (parsec_list_item_t *)task); + parsec_list_push_back(ring, (parsec_list_item_t *)task); *tries = *tries + 1; *deal_success += 1; } @@ -822,6 +860,58 @@ int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_mod return deal_success; } +/** + * @brief Tries to select tasks that has common data between each other. + * This will ensure a degree of data resuse in the starving node. + */ + +int data_reuse_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring) +{ + int j = 0; + int execution_level = 0; + /** + * @brief Keep tracks of the number of times we try to select a task. + * Upper limit is the parsec_cuda_migrate_chunk_size set by the + * mca parameter + */ + int tries = 0; + /** + * @brief Keeps track of the number of successfull tasks migrated. + * This is very important as this value is deducted from the total tasks + * the dealer device will execute + * + */ + int deal_success = 0; + + find_compute_tasks(&(dealer_device->pending), dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, DATA_REUSE_SELECTION, execution_level, ring, &tries, &deal_success); + + if (tries < parsec_cuda_migrate_chunk_size) + { + // level1 - task is availble in the stage_in queue. Stage_in not started. + execution_level = 1; + find_compute_tasks(dealer_device->exec_stream[0]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_BEFORE_STAGE_IN, -1, DATA_REUSE_SELECTION, execution_level, ring, &tries, &deal_success); + + for (j = 0; j < (dealer_device->num_exec_streams - 2); j++) + { + if (tries < parsec_cuda_migrate_chunk_size) + { + // level2 - task is available in one of the execution queue stage_in is complete + execution_level = 2; + find_compute_tasks(dealer_device->exec_stream[(2 + j)]->fifo_pending, dealer_device, starving_device, + TASK_MIGRATED_AFTER_STAGE_IN, -1, DATA_REUSE_SELECTION, execution_level, ring, &tries, &deal_success); + } + else + break; + } + } + + (void)es; + return deal_success; +} + /** * @brief check if there are any devices starving. If there are any starving device migrate * task from the dealer device to the starving device. @@ -1164,7 +1254,7 @@ int get_compute_tasks_executed(int device_index) return device_info[device_index].total_compute_tasks; } -int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status) +int find_task_affinity_to_starving_node(parsec_gpu_task_t *gpu_task, int device_index, int status) { int i; parsec_data_t *original = NULL; @@ -1203,3 +1293,54 @@ int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status return 0; } + +int find_task_to_task_affinity(parsec_gpu_task_t *first_gpu_task, parsec_gpu_task_t *sec_gpu_task, int status) +{ + int i, j, affinity = 0; + parsec_task_t *first_task = NULL; + parsec_task_t *sec_task = NULL; + parsec_data_copy_t *first_task_data_copy = NULL; + parsec_data_copy_t *sec_task_data_copy = NULL; + + /** + * @brief first_gpu_task is NULL implies there is no task selected for migration. + * So make sec_gpu_task task the first to be selected task; + */ + if( first_gpu_task == NULL ) + return 1; + + first_task = first_gpu_task->ec; + sec_task = sec_gpu_task->ec; + + for (i = 0; i < sec_task->task_class->nb_flows; i++) + { + if (NULL == sec_task->data[i].data_in || NULL == sec_task->data[i].source_repo_entry) + continue; + + if (status == TASK_MIGRATED_BEFORE_STAGE_IN) // data will be trasferred from data_in + sec_task_data_copy = sec_task->data[i].data_in; + else + sec_task_data_copy = sec_task->data[i].data_out; + + for (j = 0; j < first_task->task_class->nb_flows; j++) + { + if (NULL == first_task->data[j].data_in || NULL == sec_task->data[i].source_repo_entry) + continue; + + if (first_gpu_task->migrate_status == TASK_MIGRATED_BEFORE_STAGE_IN) // data will be trasferred from data_in + first_task_data_copy = first_task->data[j].data_in; + else + first_task_data_copy = first_task->data[j].data_out; + + if( sec_task_data_copy == first_task_data_copy) + { + affinity = 1; + break; + } + } + + return affinity; + } + + return affinity; +} diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 7b2a27396..fddc99969 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -13,16 +13,17 @@ #define CUDA_DEVICE_NUM(DEVICE_NUM) (DEVICE_NUM - 2) #define DEVICE_NUM(CUDA_DEVICE_NUM) (CUDA_DEVICE_NUM + 2) -#define TASK_NOT_MIGRATED 0 +#define TASK_NOT_MIGRATED 0 #define TASK_MIGRATED_BEFORE_STAGE_IN 1 -#define TASK_MIGRATED_AFTER_STAGE_IN 2 +#define TASK_MIGRATED_AFTER_STAGE_IN 2 -#define SINGLE_TRY_SELECTION 0 -#define SINGLE_PASS_SELECTION 1 -#define TWO_PASS_SELECTION 2 +#define SINGLE_TRY_SELECTION 0 +#define SINGLE_PASS_SELECTION 1 +#define TWO_PASS_SELECTION 2 #define AFFINITY_ONLY_SELECTION 3 +#define DATA_REUSE_SELECTION 4 -#define FIRST_PASS 1 +#define FIRST_PASS 1 #define SECOND_PASS 2 /** @@ -123,7 +124,8 @@ int dec_compute_task_count(int device_index); int inc_compute_task_count(int device_index); int inc_compute_tasks_executed(int device_index); int get_compute_tasks_executed(int device_index); -int find_task_affinity(parsec_gpu_task_t *gpu_task, int device_index, int status); +int find_task_affinity_to_starving_node(parsec_gpu_task_t *gpu_task, int device_index, int status); +int find_task_to_task_affinity(parsec_gpu_task_t *gpu_task, parsec_gpu_task_t *first_task, int status); int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int two_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, @@ -132,6 +134,8 @@ int single_try_selection(parsec_execution_stream_t *es, parsec_device_gpu_module parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int affinity_only_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); +int data_reuse_selection(parsec_execution_stream_t *es, parsec_device_gpu_module_t *dealer_device, + parsec_device_gpu_module_t *starving_device, parsec_list_t *ring); int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_device, parsec_device_gpu_module_t *starving_device, int stage_in_status, int pass_count, int selection_type, int execution_level, From 3855bf487dd6371d71e34e452cbc114bda24f6b7 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 27 Oct 2022 02:55:06 -0400 Subject: [PATCH 193/215] stage in calculation corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index eafd94e14..d08e21073 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -129,7 +129,7 @@ int parsec_cuda_migrate_fini() avg_task_migrated_per_sucess = ((float)tot_task_migrated) / ((float)device_info[i].success_count); summary_total_evictions += device_info[i].evictions; summary_total_stage_in += device_info[i].nb_stage_in; - summary_total_stage_in_req = device_info[i].nb_stage_in_req; + summary_total_stage_in_req += device_info[i].nb_stage_in_req; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); From 492b72daf9c073e4ae0ea792bdc7bec28f3dcacd Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 29 Oct 2022 20:58:47 -0400 Subject: [PATCH 194/215] header file added --- parsec/parsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parsec/parsec.c b/parsec/parsec.c index 8c7adc1ae..ddad7caff 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -66,6 +66,8 @@ #include #endif +#include "parsec/mca/device/cuda/device_cuda_migrate.h" + /* * Global variables. */ From c91a6b7e55ab724dce0ee83ee2796e74181b5fbf Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 29 Oct 2022 22:31:10 -0400 Subject: [PATCH 195/215] Task migration delegated to another thread. The second thread that offloads a task to the device is trasnistioned to a manager that handles only task migration. --- parsec/mca/device/cuda/device_cuda_migrate.c | 8 +++ parsec/mca/device/cuda/device_cuda_migrate.h | 3 +- parsec/mca/device/cuda/device_cuda_module.c | 73 +++++++++++++++++--- parsec/mca/device/device_gpu.h | 1 + 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d08e21073..d043a3440 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -4,6 +4,7 @@ extern int parsec_device_cuda_enabled; extern int parsec_migrate_statistics; +extern int parsec_cuda_migrate_tasks; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) extern int parsec_cuda_delegate_task_completion; // task completion delegation @@ -203,6 +204,13 @@ int parsec_cuda_migrate_fini() else printf("Task completion : delegated\n"); + if ( parsec_cuda_migrate_tasks == 0) + printf("Migration : no migration \n"); + else if ( parsec_cuda_migrate_tasks == 1) + printf("Migration : not delegated \n"); + else + printf("Migration : delegated \n"); + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double)time_stamp() / 1000000000); } diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index fddc99969..7cc5c3e4e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -143,5 +143,6 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d int parsec_cuda_inc_eviction_count(int device_index); int parsec_cuda_inc_stage_in_count(int device); int parsec_cuda_inc_stage_in_req_count(int device); - +parsec_hook_return_t parsec_cuda_migrate_manager( parsec_execution_stream_t *es, + parsec_device_gpu_module_t* gpu_device ); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 3050c77e6..8205f6e5b 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -53,7 +53,6 @@ extern int parsec_cuda_iterative; extern int parsec_cuda_migrate_chunk_size; extern int parsec_gpu_task_count_start; extern int parsec_gpu_task_count_end; -extern int parsec_cuda_migrate_tasks; extern int parsec_migrate_statistics; extern int parsec_cuda_delegate_task_completion; @@ -377,6 +376,8 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) len = asprintf(&gpu_device->super.name, "%s: cuda(%d)", szName, dev_id); if(-1 == len) { gpu_device->super.name = NULL; goto release_device; } gpu_device->data_avail_epoch = 0; + gpu_device->mutex = 0; + gpu_device->migrate_manager_mutex = 0; gpu_device->max_exec_streams = parsec_cuda_max_streams; gpu_device->exec_stream = @@ -996,10 +997,6 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, if( NULL == gpu_elem->device_private ) { #endif find_another_data: - - if (parsec_migrate_statistics) - parsec_cuda_inc_eviction_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); - /* Look for a data_copy to free */ lru_gpu_elem = (parsec_gpu_data_copy_t*)parsec_list_pop_front(&gpu_device->gpu_mem_lru); if( NULL == lru_gpu_elem ) { @@ -1199,6 +1196,10 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, __FILE__, __LINE__); PARSEC_OBJ_RELEASE(lru_gpu_elem); assert( NULL == lru_gpu_elem ); + + if (parsec_migrate_statistics) + parsec_cuda_inc_eviction_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + goto malloc_data; } PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream, @@ -2209,9 +2210,10 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, assert(*out_task != NULL); rc = cudaEventQuery(cuda_stream->begin_events[stream->end]); assert( cudaSuccess == rc ); + event_duration = 0; rc = cudaEventElapsedTime( &event_duration, cuda_stream->begin_events[stream->end], cuda_stream->events[stream->end] ); assert( cudaSuccess == rc ); - assert( event_duration >=0 ); + assert( event_duration >= 0 ); /** * cudaEventQuery() return time in milli sec, with a resolution of .5 micro sec. * time_stamp() return time in nano sec. @@ -2863,7 +2865,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device; parsec_device_cuda_module_t *cuda_device; cudaError_t status; - int rc, exec_stream = 0, nb_migrated = 0; + int rc, rc1, exec_stream = 0, nb_migrated = 0; parsec_gpu_task_t *progress_task, *out_task_submit = NULL, *out_task_pop = NULL; #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; @@ -2893,7 +2895,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * not in the queue yet. */ while(1) { - rc = gpu_device->mutex; + rc = rc1 = gpu_device->mutex; struct timespec delay; if( rc >= 0 ) { if( parsec_atomic_cas_int32( &gpu_device->mutex, rc, rc+1 ) ) { @@ -2947,6 +2949,25 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( 0 < rc ) { parsec_fifo_push( &(gpu_device->pending), (parsec_list_item_t*)gpu_task ); + + /** + * @brief + * parsec_cuda_migrate_tasks == 0 : no task migration + * parsec_cuda_migrate_tasks == 1 : task will be migrated by the manager thread + * parsec_cuda_migrate_tasks == 2 : task will be migrate a different thread + * + */ + if(parsec_cuda_migrate_tasks == 2) + { + /** + * @brief 'rc1 == 1' is important or the manager thread will transition + * to migrate manager. 'migrate_manager_mutex == 0' will ensure that there + * is only one migrate manager per device. + */ + if( rc1 == 1 && gpu_device->migrate_manager_mutex == 0 ) + parsec_cuda_migrate_manager(es, gpu_device); + } + return PARSEC_HOOK_RETURN_ASYNC; } PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Entering GPU management at %s:%d", @@ -3238,7 +3259,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * is deducted from the total number of tasks that will be executed by this * GPU. */ - if(parsec_cuda_migrate_tasks) + if(parsec_cuda_migrate_tasks == 1) { nb_migrated = migrate_to_starving_device(es, gpu_device); if( nb_migrated > 0 ) @@ -3270,4 +3291,38 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, return PARSEC_HOOK_RETURN_DISABLE; } + +parsec_hook_return_t +parsec_cuda_migrate_manager( parsec_execution_stream_t *es, + parsec_device_gpu_module_t* gpu_device ) +{ + int rc = 0, nb_migrated = 0; + + (void)es; + + if( gpu_device->migrate_manager_mutex > 0 ) + return PARSEC_HOOK_RETURN_ASYNC; + else + { + rc = gpu_device->migrate_manager_mutex; + if( !parsec_atomic_cas_int32( &gpu_device->migrate_manager_mutex, rc, rc+1 ) ) + return PARSEC_HOOK_RETURN_ASYNC; + } + + /** + * @brief The migrate_manager thread exits when there are no more + * work to be done. + */ + while( gpu_device->mutex > 0) + { + nb_migrated = migrate_to_starving_device(es, gpu_device); + if( nb_migrated > 0 ) + { + rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); + } + } + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->migrate_manager_mutex) ); + return PARSEC_HOOK_RETURN_ASYNC; +} + #endif /* PARSEC_HAVE_CUDA */ diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 14345c1b7..e93d3ffd4 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -161,6 +161,7 @@ struct parsec_device_gpu_module_s { parsec_gpu_exec_stream_t **exec_stream; size_t mem_block_size; int64_t mem_nb_blocks; + volatile int32_t migrate_manager_mutex; }; struct parsec_gpu_exec_stream_s { From 8bc4c55666e8c34e6adb499d10873c379afa6383 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 31 Oct 2022 21:34:29 -0400 Subject: [PATCH 196/215] lock mechanism generalised for all selection policies. --- parsec/mca/device/cuda/device_cuda_migrate.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index d043a3440..dae1bf48c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -514,34 +514,32 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d assert(list != NULL); + parsec_list_lock(list); + if ( selection_type == SINGLE_TRY_SELECTION ) { - do + do { *tries += 1; - task = (parsec_gpu_task_t *)parsec_list_pop_back(list); + task = (parsec_gpu_task_t *)parsec_list_nolock_pop_back(list); if (task != NULL) { if ((task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) && (task->migrate_status == TASK_NOT_MIGRATED)) { set_migrate_status(dealer_device, starving_device, task, execution_level); PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); - parsec_list_push_back(ring, (parsec_list_item_t *)task); + parsec_list_nolock_push_back(ring, (parsec_list_item_t *)task); *deal_success += 1; } else { - parsec_list_push_back(list, (parsec_list_item_t *)task); + parsec_list_nolock_push_back(list, (parsec_list_item_t *)task); task = NULL; } } } while ((*tries < parsec_cuda_migrate_chunk_size) && (task != NULL)); - - return *deal_success; } - parsec_list_lock(list); - if ( selection_type == DATA_REUSE_SELECTION ) { From 526c46a01260b6e4becaccbc5b10c9b6a137050c Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 2 Nov 2022 19:32:38 -0400 Subject: [PATCH 197/215] New mca parameter for an unfair task mapping. Only to be used for testing purposes. --- parsec/mca/device/cuda/device_cuda_component.c | 10 ++++++++-- parsec/mca/device/device.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_component.c b/parsec/mca/device/cuda/device_cuda_component.c index 472dfaafb..58743a457 100644 --- a/parsec/mca/device/cuda/device_cuda_component.c +++ b/parsec/mca/device/cuda/device_cuda_component.c @@ -46,6 +46,7 @@ int parsec_cuda_iterative = 0; int parsec_cuda_migrate_chunk_size = 0; int parsec_cuda_migrate_task_selection = 0; int parsec_cuda_delegate_task_completion = 0; +int parsec_cuda_unfair_mapping = 0; static int cuda_mask, cuda_nvlink_mask; @@ -219,8 +220,13 @@ static int device_cuda_component_register(void) "Integer to choose the method of task selection during migration", false, false, 1, &parsec_cuda_migrate_task_selection); (void)parsec_mca_param_reg_int_name("device_cuda", "delegate_task_completion", - "Integer to choose the whether task completion should be done by a manager thread (default is yes)", - false, false, 1, &parsec_cuda_delegate_task_completion); + "Integer to choose the whether task completion should be done by a manager thread (default is no)", + false, false, 0, &parsec_cuda_delegate_task_completion); + (void)parsec_mca_param_reg_int_name("device_cuda", "unfair_mapping", + "Integer to choose the whether the we shpuld have an unfair task mapping. This is used only for testing purposes (default is no)", + false, false, 0, &parsec_cuda_unfair_mapping); + + diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 4fa7086d6..d445d8186 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -47,6 +47,7 @@ static parsec_device_module_t **modules_activated = NULL; static mca_base_component_t **device_components = NULL; extern int parsec_cuda_iterative; +extern int parsec_cuda_unfair_mapping; /** * Temporary solution: Use the following two arrays to taskpool the weight and @@ -195,6 +196,20 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) parsec_task_snprintf(task_str, MAX_TASK_STRLEN, this_task), dev_index, i); } } + + // map all tasks to the first device + if(parsec_cuda_unfair_mapping == 1) + { + dev_index = 2; + } + else if(parsec_cuda_unfair_mapping == 2) // map all tasks to the half of the available device + { + if( dev_index > (parsec_mca_device_enabled() / 2) && dev_index != 2) + { + dev_index = dev_index - (parsec_mca_device_enabled() / 2) + 1 ; + } + } + return dev_index; } From 2b8ff9ebd67539979afadbe3fb8de81ecf6a3192 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 24 Nov 2022 22:19:04 -0500 Subject: [PATCH 198/215] 1. Starvation condition updated. Now it is based on the number of streams in the device. 2. Same starvation condition used on both dealer and starving device. --- parsec/mca/device/cuda/device_cuda_migrate.c | 38 +++++++------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index dae1bf48c..dc056454c 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -299,30 +299,8 @@ int parsec_cuda_inc_stage_in_req_count(int device) */ int is_starving(int device) { - /** - * @brief The default number of execution stream in PaRSEC is 2. We assume - * starvtion if the number of ready tasks available is less than twice the - * number of execution stream. - */ parsec_device_gpu_module_t *d = (parsec_device_gpu_module_t *)parsec_mca_device_get(DEVICE_NUM(device)); - return (d->mutex < 5) ? 1 : 0; - - // return (parsec_cuda_get_device_task(device, -1) < 5) ? 1 : 0; - // return (get_compute_tasks_executed(device) < 5) ? 1 : 0; -} - -int will_starve(int device) -{ - /** - * @brief The default number of execution stream in PaRSEC is 2. We assume - * starvtion if migrating a task will push the number of ready tasks available - * to less than twice the number of execution stream. - */ - // parsec_device_gpu_module_t* d = parsec_mca_device_get( DEVICE_NUM(device) ); - // return (d->mutex < 5) ? 1 : 0; - - // return ((parsec_cuda_get_device_task(device, -1) - 1) < 5) ? 1 : 0; - return (get_compute_tasks_executed(device) < 5) ? 1 : 0; + return (d->mutex < d->num_exec_streams ) ? 1 : 0; } /** @@ -938,7 +916,7 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ migrated_task_t *mig_task = NULL; dealer_device_index = CUDA_DEVICE_NUM(dealer_device->super.device_index); - if (will_starve(dealer_device_index)) + if (is_starving(dealer_device_index)) return 0; // parse all available device looking for starving devices. @@ -984,7 +962,6 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)mig_task); parsec_cuda_mig_task_enqueue(es, mig_task); - device_info[dealer_device_index].last_device = starving_device_index; char tmp[MAX_TASK_STRLEN]; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Task %s migrated (level %d, stage_in %d) from device %d to device %d", parsec_task_snprintf(tmp, MAX_TASK_STRLEN, ((parsec_gpu_task_t *)migrated_gpu_task)->ec), @@ -992,10 +969,19 @@ int migrate_to_starving_device(parsec_execution_stream_t *es, parsec_device_gpu_ } // end while if (deal_success > 0) + { device_info[dealer_device_index].success_count++; + device_info[dealer_device_index].last_device = starving_device_index; + } + else + { + break; + } - if (will_starve(dealer_device_index)) + if (is_starving(dealer_device_index)) + { break; + } } // end for d /* update the expected load on the GPU device */ From ba8f0a139acb3b5d7696788244f1094e1747428e Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 24 Nov 2022 22:32:34 -0500 Subject: [PATCH 199/215] parsec_cuda_migrate_manager() moved to parsec/mca/device/cuda/device_cuda_migrate.c --- parsec/mca/device/cuda/device_cuda_migrate.c | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index dc056454c..6f59727c1 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1336,3 +1336,36 @@ int find_task_to_task_affinity(parsec_gpu_task_t *first_gpu_task, parsec_gpu_tas return affinity; } + +parsec_hook_return_t +parsec_cuda_migrate_manager( parsec_execution_stream_t *es, + parsec_device_gpu_module_t* gpu_device ) +{ + int rc = 0, nb_migrated = 0; + + (void)es; + + if( gpu_device->migrate_manager_mutex > 0 ) + return PARSEC_HOOK_RETURN_ASYNC; + else + { + rc = gpu_device->migrate_manager_mutex; + if( !parsec_atomic_cas_int32( &gpu_device->migrate_manager_mutex, rc, rc+1 ) ) + return PARSEC_HOOK_RETURN_ASYNC; + } + + /** + * @brief The migrate_manager thread exits when there are no more + * work to be done. + */ + while( gpu_device->mutex > 0) + { + nb_migrated = migrate_to_starving_device(es, gpu_device); + if( nb_migrated > 0 ) + { + rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); + } + } + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->migrate_manager_mutex) ); + return PARSEC_HOOK_RETURN_ASYNC; +} \ No newline at end of file From 992d60f87e07113797fad6a633331be37bde6b0b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 27 Nov 2022 19:49:13 -0500 Subject: [PATCH 200/215] 1. Task completion offloaded to the co-manager. 2. Functions and variables renamed. 3. More statistics added. --- parsec/mca/device/cuda/device_cuda_migrate.c | 87 +++++++++++++----- parsec/mca/device/cuda/device_cuda_migrate.h | 7 +- parsec/mca/device/cuda/device_cuda_module.c | 92 +++++++++++--------- parsec/mca/device/device_gpu.h | 4 +- 4 files changed, 124 insertions(+), 66 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 6f59727c1..c5aae2f1b 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -66,20 +66,22 @@ int parsec_cuda_migrate_init(int ndevices) { for (j = 0; j < EXECUTION_LEVEL; j++) device_info[i].task_count[j] = 0; - device_info[i].load = 0; - device_info[i].level0 = 0; - device_info[i].level1 = 0; - device_info[i].level2 = 0; + device_info[i].load = 0; + device_info[i].level0 = 0; + device_info[i].level1 = 0; + device_info[i].level2 = 0; device_info[i].total_tasks_executed = 0; - device_info[i].received = 0; - device_info[i].last_device = i; - device_info[i].deal_count = 0; - device_info[i].success_count = 0; - device_info[i].ready_compute_tasks = 0; - device_info[i].affinity_count = 0; - device_info[i].evictions = 0; - device_info[i].nb_stage_in = 0; - device_info[i].nb_stage_in_req= 0; + device_info[i].received = 0; + device_info[i].last_device = i; + device_info[i].deal_count = 0; + device_info[i].success_count = 0; + device_info[i].ready_compute_tasks = 0; + device_info[i].affinity_count = 0; + device_info[i].evictions = 0; + device_info[i].nb_stage_in = 0; + device_info[i].nb_stage_in_req = 0; + device_info[i].completed_co_manager = 0; + device_info[i].completed_manager = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -105,7 +107,8 @@ int parsec_cuda_migrate_fini() int summary_total_tasks_migrated = 0, summary_total_l0_tasks_migrated = 0, summary_total_l1_tasks_migrated = 0, summary_total_l2_tasks_migrated = 0; int summary_deals = 0, summary_successful_deals = 0, summary_affinity = 0; float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; - int summary_total_evictions = 0, summary_total_stage_in = 0, summary_total_stage_in_req; + int summary_total_evictions = 0, summary_total_stage_in = 0, summary_total_stage_in_req = 0; + int summary_completed_manager = 0, summary_completed_co_manager = 0; #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); @@ -131,6 +134,8 @@ int parsec_cuda_migrate_fini() summary_total_evictions += device_info[i].evictions; summary_total_stage_in += device_info[i].nb_stage_in; summary_total_stage_in_req += device_info[i].nb_stage_in_req; + summary_completed_manager += device_info[i].completed_manager; + summary_completed_co_manager += device_info[i].completed_co_manager; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); @@ -158,6 +163,8 @@ int parsec_cuda_migrate_fini() printf("Stage in required : %d \n", device_info[i].nb_stage_in_req); printf("Perc eviction for stage in initiated : %lf \n", (( (float)device_info[i].evictions / device_info[i].nb_stage_in) * 100 ) ); printf("Perc eviction for stage in required : %lf \n", (((float)device_info[i].evictions / device_info[i].nb_stage_in_req) * 100 )); + printf("Tasks completed by manager : %d \n", device_info[i].completed_manager); + printf("Tasks completed by co-manager : %d \n", device_info[i].completed_co_manager); } printf("\n *********** SUMMARY *********** \n"); @@ -186,6 +193,9 @@ int parsec_cuda_migrate_fini() printf("Perc eviction for stage in initiated : %lf \n", (((float)summary_total_evictions / summary_total_stage_in) * 100 ) ); printf("Perc eviction for stage in required : %lf \n", (((float)summary_total_evictions / summary_total_stage_in_req) * 100 ) ); + printf("Tasks completed by manager : %d \n", summary_completed_manager); + printf("Tasks completed by co-manager : %d \n", summary_completed_co_manager); + if (parsec_cuda_migrate_task_selection == SINGLE_TRY_SELECTION) @@ -1246,6 +1256,18 @@ int get_compute_tasks_executed(int device_index) return device_info[device_index].total_compute_tasks; } +int inc_manager_complete_count(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].completed_manager); + return device_info[device_index].completed_manager; +} + +int inc_co_manager_complete_count(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].completed_co_manager); + return device_info[device_index].completed_co_manager; +} + int find_task_affinity_to_starving_node(parsec_gpu_task_t *gpu_task, int device_index, int status) { int i; @@ -1338,19 +1360,20 @@ int find_task_to_task_affinity(parsec_gpu_task_t *first_gpu_task, parsec_gpu_tas } parsec_hook_return_t -parsec_cuda_migrate_manager( parsec_execution_stream_t *es, +parsec_cuda_co_manager( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device ) { - int rc = 0, nb_migrated = 0; + int rc = 0, nb_migrated = 0, i = 0; + parsec_task_t* task = NULL; (void)es; - if( gpu_device->migrate_manager_mutex > 0 ) + if( gpu_device->co_manager_mutex > 0 ) return PARSEC_HOOK_RETURN_ASYNC; else { - rc = gpu_device->migrate_manager_mutex; - if( !parsec_atomic_cas_int32( &gpu_device->migrate_manager_mutex, rc, rc+1 ) ) + rc = gpu_device->co_manager_mutex; + if( !parsec_atomic_cas_int32( &gpu_device->co_manager_mutex, rc, rc+1 ) ) return PARSEC_HOOK_RETURN_ASYNC; } @@ -1358,14 +1381,36 @@ parsec_cuda_migrate_manager( parsec_execution_stream_t *es, * @brief The migrate_manager thread exits when there are no more * work to be done. */ - while( gpu_device->mutex > 0) + while( gpu_device->mutex > 0 || gpu_device->complete_mutex > 0) { nb_migrated = migrate_to_starving_device(es, gpu_device); if( nb_migrated > 0 ) { rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); } + + if(gpu_device->complete_mutex > 0) + { + + /** try completion PARSEC_MAX_EVENTS_PER_STREAM tasks */ + for( i = 0; i < PARSEC_MAX_EVENTS_PER_STREAM; i++) + { + task = NULL; + task = (parsec_task_t*)parsec_fifo_pop( &(gpu_device->to_complete) ); + if( task != NULL) + { + __parsec_complete_execution( es, task ); + parsec_atomic_fetch_dec_int32( &(gpu_device->complete_mutex) ); + } + + if( gpu_device->complete_mutex == 0 ) + { + break; + } + } + } + } - rc = parsec_atomic_fetch_dec_int32( &(gpu_device->migrate_manager_mutex) ); + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->co_manager_mutex) ); return PARSEC_HOOK_RETURN_ASYNC; } \ No newline at end of file diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 7cc5c3e4e..95ebb8bab 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -54,6 +54,8 @@ typedef struct parsec_device_cuda_info_s int evictions; int nb_stage_in; int nb_stage_in_req; + int completed_manager; + int completed_co_manager; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -143,6 +145,9 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d int parsec_cuda_inc_eviction_count(int device_index); int parsec_cuda_inc_stage_in_count(int device); int parsec_cuda_inc_stage_in_req_count(int device); -parsec_hook_return_t parsec_cuda_migrate_manager( parsec_execution_stream_t *es, +parsec_hook_return_t parsec_cuda_co_manager( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device ); +int inc_manager_complete_count(int device_index); +int inc_co_manager_complete_count(int device_index); + #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 8205f6e5b..f42a59d79 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -377,7 +377,8 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) if(-1 == len) { gpu_device->super.name = NULL; goto release_device; } gpu_device->data_avail_epoch = 0; gpu_device->mutex = 0; - gpu_device->migrate_manager_mutex = 0; + gpu_device->complete_mutex = 0; + gpu_device->co_manager_mutex = 0; gpu_device->max_exec_streams = parsec_cuda_max_streams; gpu_device->exec_stream = @@ -494,6 +495,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); + PARSEC_OBJ_CONSTRUCT(&gpu_device->to_complete, parsec_fifo_t); gpu_device->sort_starting_p = NULL; gpu_device->peer_access_mask = 0; /* No GPU to GPU direct transfer by default */ @@ -592,6 +594,8 @@ parsec_cuda_module_fini(parsec_device_module_t* device) /* Release pending queue */ PARSEC_OBJ_DESTRUCT(&gpu_device->pending); + PARSEC_OBJ_DESTRUCT(&gpu_device->to_complete); + /* Release all streams */ for( j = 0; j < gpu_device->num_exec_streams; j++ ) { @@ -2954,18 +2958,20 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, * @brief * parsec_cuda_migrate_tasks == 0 : no task migration * parsec_cuda_migrate_tasks == 1 : task will be migrated by the manager thread - * parsec_cuda_migrate_tasks == 2 : task will be migrate a different thread + * parsec_cuda_migrate_tasks == 2 : task will be migrate by a co-manager * + * parsec_cuda_delegate_task_completion == 0: the manager will complete the tasks + * parsec_cuda_delegate_task_completion == 1: the co-manager will complete the tasks */ - if(parsec_cuda_migrate_tasks == 2) + if(parsec_cuda_migrate_tasks == 2 || parsec_cuda_delegate_task_completion == 1) { /** * @brief 'rc1 == 1' is important or the manager thread will transition - * to migrate manager. 'migrate_manager_mutex == 0' will ensure that there + * to co-manager. 'co_manager_mutex == 0' will ensure that there * is only one migrate manager per device. */ - if( rc1 == 1 && gpu_device->migrate_manager_mutex == 0 ) - parsec_cuda_migrate_manager(es, gpu_device); + if( rc1 == 1 && gpu_device->co_manager_mutex == 0 ) + parsec_cuda_co_manager(es, gpu_device); } return PARSEC_HOOK_RETURN_ASYNC; @@ -3148,14 +3154,30 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_cuda_kernel_epilog( gpu_device, gpu_task ); gpu_device->super.executed_tasks++; + /** The manager will complete the tasks */ if( parsec_cuda_delegate_task_completion == 0 ) + { __parsec_complete_execution( es, gpu_task->ec ); + + if(parsec_migrate_statistics) + inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + } + /** The co-manager will complete the task. But first check if such a manager is active */ + else if ( gpu_device->co_manager_mutex > 0 ) + { + parsec_atomic_fetch_inc_int32( &(gpu_device->complete_mutex) ); + parsec_fifo_push( &(gpu_device->to_complete), (parsec_list_item_t*)gpu_task->ec ); + + if(parsec_migrate_statistics) + inc_co_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + } + /** If the co-manager is not yet ready */ else { - gpu_task->ec->priority = INT32_MAX; /* Assign maximum priority */ - gpu_task->ec->status = PARSEC_TASK_STATUS_COMPLETE; - PARSEC_LIST_ITEM_SINGLETON(gpu_task->ec); - __parsec_schedule(es, (parsec_task_t *)gpu_task->ec, 0); + __parsec_complete_execution( es, gpu_task->ec ); + + if(parsec_migrate_statistics) + inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); } #if defined(PARSEC_PROF_TRACE) @@ -3246,6 +3268,23 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Leaving GPU management at %s:%d", gpu_device->super.name, __FILE__, __LINE__); + + /** manager has nothing else to do. So it can complete the tasks offloaded to the co-manager */ + parsec_task_t *task = NULL; + while( gpu_device->complete_mutex > 0) + { + task = NULL; + task = (parsec_task_t*)parsec_fifo_pop( &(gpu_device->to_complete) ); + if( task != NULL) + { + __parsec_complete_execution( es, task ); + parsec_atomic_fetch_dec_int32( &(gpu_device->complete_mutex) ); + + if(parsec_migrate_statistics) + inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + } + } + return PARSEC_HOOK_RETURN_ASYNC; } gpu_task = progress_task; @@ -3292,37 +3331,4 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } -parsec_hook_return_t -parsec_cuda_migrate_manager( parsec_execution_stream_t *es, - parsec_device_gpu_module_t* gpu_device ) -{ - int rc = 0, nb_migrated = 0; - - (void)es; - - if( gpu_device->migrate_manager_mutex > 0 ) - return PARSEC_HOOK_RETURN_ASYNC; - else - { - rc = gpu_device->migrate_manager_mutex; - if( !parsec_atomic_cas_int32( &gpu_device->migrate_manager_mutex, rc, rc+1 ) ) - return PARSEC_HOOK_RETURN_ASYNC; - } - - /** - * @brief The migrate_manager thread exits when there are no more - * work to be done. - */ - while( gpu_device->mutex > 0) - { - nb_migrated = migrate_to_starving_device(es, gpu_device); - if( nb_migrated > 0 ) - { - rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); - } - } - rc = parsec_atomic_fetch_dec_int32( &(gpu_device->migrate_manager_mutex) ); - return PARSEC_HOOK_RETURN_ASYNC; -} - #endif /* PARSEC_HAVE_CUDA */ diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index e93d3ffd4..33c18d78e 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -149,6 +149,7 @@ struct parsec_device_gpu_module_s { * the index of the set bit device. */ volatile int32_t mutex; + volatile int32_t complete_mutex; uint64_t data_avail_epoch; /**< Identifies the epoch of the data status on the devide. It * is increased every time a new data is made available, so * that we know which tasks can be evaluated for submission. @@ -156,12 +157,13 @@ struct parsec_device_gpu_module_s { parsec_list_t gpu_mem_lru; /* Read-only blocks, and fresh blocks */ parsec_list_t gpu_mem_owned_lru; /* Dirty blocks */ parsec_fifo_t pending; + parsec_fifo_t to_complete; struct zone_malloc_s *memory; parsec_list_item_t *sort_starting_p; parsec_gpu_exec_stream_t **exec_stream; size_t mem_block_size; int64_t mem_nb_blocks; - volatile int32_t migrate_manager_mutex; + volatile int32_t co_manager_mutex; }; struct parsec_gpu_exec_stream_s { From b04f102307c688dd5e929ca670c4b42f1ca25b28 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 28 Nov 2022 05:47:01 -0500 Subject: [PATCH 201/215] Code cleanup. There is no need to track task count at every stage. --- parsec/mca/device/cuda/device_cuda_migrate.c | 3 --- parsec/mca/device/cuda/device_cuda_module.c | 25 +------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index c5aae2f1b..9db55c3ec 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -431,17 +431,14 @@ int set_migrate_status(parsec_device_gpu_module_t *dealer_device, parsec_device_ { if (execution_level == 0) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 0); device_info[dealer_device_index].level0++; } if (execution_level == 1) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 1); device_info[dealer_device_index].level1++; } if (execution_level == 2) { - parsec_cuda_set_device_task(dealer_device_index, /* count */ -1, /* level */ 2); device_info[dealer_device_index].level2++; } } diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index f42a59d79..646485be7 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2175,25 +2175,7 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, if( NULL != task ) { PARSEC_PUSH_TASK(stream->fifo_pending, (parsec_list_item_t*)task); - if(parsec_migrate_statistics) - { - if (task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) - { - if(stream == gpu_device->exec_stream[0]) - { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 0); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 1); - } - else if(stream != gpu_device->exec_stream[1] && stream != gpu_device->exec_stream[0]) - { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 1); - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 2); - } - } - } - task = NULL; - } *out_task = NULL; progress_fct = upstream_progress_fct; @@ -2912,9 +2894,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, } } - if (parsec_migrate_statistics && (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL)) - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ 1, /* level */ 0); // increment task count for this device - // keep track of compute task count. Increment compute task count. if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) inc_compute_task_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); @@ -3241,9 +3220,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, #endif if (parsec_migrate_statistics) - { - parsec_cuda_set_device_task(CUDA_DEVICE_NUM(gpu_device->super.device_index), /* count */ -1, /* level */ 2); - + { if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) inc_compute_tasks_executed(CUDA_DEVICE_NUM(gpu_device->super.device_index)); } From bbb929da82eba844020b80e4f3bdfb2ff8ec1662 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 5 Dec 2022 17:54:41 -0500 Subject: [PATCH 202/215] parsec_cuda_co_manager corrected --- parsec/mca/device/cuda/device_cuda_migrate.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9db55c3ec..6440e5e11 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1380,10 +1380,13 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, */ while( gpu_device->mutex > 0 || gpu_device->complete_mutex > 0) { - nb_migrated = migrate_to_starving_device(es, gpu_device); - if( nb_migrated > 0 ) + if(parsec_cuda_migrate_tasks == 2) { - rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); + nb_migrated = migrate_to_starving_device(es, gpu_device); + if( nb_migrated > 0 ) + { + rc = parsec_atomic_fetch_add_int32(&(gpu_device->mutex), -1 * nb_migrated); + } } if(gpu_device->complete_mutex > 0) From 4b98c2afc2433c6a59414a7abc509e6e2bb29646 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 7 Dec 2022 17:27:13 -0500 Subject: [PATCH 203/215] Minor code changes --- parsec/data.c | 10 +++++++--- parsec/mca/device/cuda/device_cuda_migrate.c | 9 +++++++++ parsec/mca/device/cuda/device_cuda_module.c | 21 +++++++++++--------- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/parsec/data.c b/parsec/data.c index 56667792e..ac58f67ed 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -207,13 +207,17 @@ int parsec_data_copy_detach(parsec_data_t* data, { for( i = 0; i < parsec_nb_devices; i++ ) { - if( i == device) continue; - if( NULL == data->device_copies[i] ) continue; + if( i == device || NULL == data->device_copies[i] + || PARSEC_DATA_COHERENCY_INVALID == data->device_copies[i]->coherency_state) + { + continue; + } if( data->device_copies[i]->version < copy->version) { younger_version = i; continue; - } + } + data->owner_device = data->device_copies[i]->device_index; new_owner_copy = data->device_copies[i]; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 6440e5e11..588a77768 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -8,6 +8,7 @@ extern int parsec_cuda_migrate_tasks; extern int parsec_cuda_migrate_chunk_size; // chunks of task migrated to a device (default=5) extern int parsec_cuda_migrate_task_selection; // method of task selection (default == single_pass_selection) extern int parsec_cuda_delegate_task_completion; // task completion delegation +extern int parsec_cuda_iterative; parsec_device_cuda_info_t *device_info; static parsec_list_t *migrated_task_list; // list of all migrated task @@ -168,6 +169,7 @@ int parsec_cuda_migrate_fini() } printf("\n *********** SUMMARY *********** \n"); + printf("Total devices : %d \n", NDEVICES); printf("Total tasks executed : %d \n", summary_total_tasks_executed); printf("Total compute tasks executed : %d \n", summary_total_compute_tasks_executed); printf("Perc of compute tasks : %lf \n", ((float)summary_total_compute_tasks_executed / summary_total_tasks_executed) * 100); @@ -221,6 +223,12 @@ int parsec_cuda_migrate_fini() else printf("Migration : delegated \n"); + if(parsec_cuda_iterative) + printf("Iterative task mapping : yes \n"); + else + printf("Iterative task mapping : no \n"); + + printf("\n---------Execution time = %ld ns ( %lf s)------------ \n", time_stamp(), (double)time_stamp() / 1000000000); } @@ -1119,6 +1127,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); PARSEC_OBJ_RELEASE(task->data[i].data_out); zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); + task->data[i].data_out->device_private = NULL; } parsec_atomic_unlock(&original->lock); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 646485be7..0151d5597 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -1360,6 +1360,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_data_copy_t* in_elem = task_data->data_in; parsec_data_t* original = in_elem->original; parsec_gpu_data_copy_t* gpu_elem = task_data->data_out; + parsec_data_copy_t *candidate = NULL; parsec_device_cuda_module_t *in_elem_dev = NULL; uint32_t nb_elts = gpu_task->flow_nb_elts[flow->flow_index]; int transfer_from = -1; @@ -1412,7 +1413,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, * it means that we have already identifies a staged_in data as the possible * candidate. So we can directly use that data for D2D transfer. */ - parsec_data_copy_t *candidate = gpu_task->candidate[flow->flow_index]; + candidate = gpu_task->candidate[flow->flow_index]; parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(candidate->device_index); assert(PARSEC_DEV_CUDA == target->super.super.type && candidate != NULL ); @@ -1430,9 +1431,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( /* Check for NULL is important. Otherwise, in cases where the flow is a NEW on GPU it will cause problems */ gpu_task->original_data_in[ flow->flow_index ] != NULL && gpu_task->original_data_in[ flow->flow_index ] != task_data->data_in) - { - PARSEC_OBJ_RELEASE(task_data->data_in); - } + { + PARSEC_OBJ_RELEASE(task_data->data_in); + assert(task_data->data_in != candidate); + } /** * if the data was already staged_in then we would have already incremented @@ -1464,7 +1466,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [readers %d, ref_count %d] on CUDA device %d is the best candidate (case 1) to Device to Device copy", gpu_device->super.name, in_elem, in_elem->readers, in_elem->super.super.obj_reference_count, in_elem_dev->cuda_index); - // Remember the original data_in. + /** Remember the original data_in. */ assert( gpu_task->original_data_in[ flow->flow_index ] == NULL); gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; @@ -1476,7 +1478,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_device_cuda_module_t *target = (parsec_device_cuda_module_t*)parsec_mca_device_get(t); if( PARSEC_DEV_CUDA != target->super.super.type ) continue; if(gpu_device->peer_access_mask & (1 << target->cuda_index)) { - parsec_data_copy_t *candidate = original->device_copies[t]; + candidate = original->device_copies[t]; if( (NULL != candidate && candidate->version == in_elem->version )) { @@ -1499,7 +1501,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:\tData copy %p [ref_count %d] on CUDA device %d is the best candidate (case 3) to Device to Device copy, increasing its readers to %d", gpu_device->super.name, candidate, candidate->super.super.obj_reference_count, target->cuda_index, candidate->readers+1); - // Remember the original data_in. + /** Remember the original data_in. */ assert( gpu_task->original_data_in[ flow->flow_index ] == NULL); gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; @@ -1538,7 +1540,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, transfer_from = parsec_data_start_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); if( PARSEC_FLOW_ACCESS_WRITE & type && gpu_task->task_type != PARSEC_GPU_TASK_TYPE_PREFETCH ) { - //gpu_elem->version++; /* on to the next version */ gpu_elem->version = in_elem->version; PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%s]: GPU copy %p [ref_count %d] increments version to %d at %s:%d", @@ -1554,10 +1555,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, "New tile created original %p "); } - /* Do not need to be tranferred */ if (parsec_migrate_statistics) parsec_cuda_inc_stage_in_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); + /* Do not need to be tranferred */ if( -1 == transfer_from ) { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; @@ -2163,7 +2164,9 @@ progress_stream( parsec_device_gpu_module_t* gpu_device, { parsec_advance_task_function_t progress_fct; int saved_rc = 0, rc; +#if defined(PARSEC_PROF_TRACE) float event_duration = 0; +#endif #if defined(PARSEC_DEBUG_NOISIER) char task_str[MAX_TASK_STRLEN]; #endif From 3903a6b2e80ef941efce5810f2ef0c84a85a823b Mon Sep 17 00:00:00 2001 From: Joseph John Date: Thu, 15 Dec 2022 19:54:23 -0500 Subject: [PATCH 204/215] When tasks completion was delegated we were releasing data before the parsec_complete() was called. This was correted. --- parsec/mca/device/cuda/device_cuda_migrate.c | 140 +++++++++---------- parsec/mca/device/cuda/device_cuda_module.c | 69 ++++----- 2 files changed, 98 insertions(+), 111 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 588a77768..f88d74eee 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -716,6 +716,7 @@ int single_pass_selection(parsec_execution_stream_t *es, parsec_device_gpu_modul else break; } + } (void)es; @@ -1036,77 +1037,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t *original = task->data[i].data_out->original; parsec_atomic_lock(&original->lock); - - assert(original->device_copies[dealer_device->super.device_index] != NULL); - assert(original->device_copies[dealer_device->super.device_index] == task->data[i].data_out); - assert(task->data[i].data_out->device_index == dealer_device->super.device_index); - - /** - * Even if the task has only read access, the data may have been modified - * by another task, and it may be 'dirty'. We check the version of the data - * to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, - * if not is is pused to gpu_mem_lru. - */ - if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) - { - assert(task->data[i].data_out->readers > 0); - /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->candidate[i] = task->data[i].data_out; - - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || - task->data[i].data_out->version > task->data[i].data_in->version) - { - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); - } - else - { - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - } - } - /** - * If the task has only read-write access, the data may have been modified - * by another task, and it may be 'dirty'. We check the version of the data - * to verify if it is dirty. If it is, then it is pushed to gpu_mem_owned_lru, - * if not is is pused to gpu_mem_lru. - */ - if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) - { - assert(task->data[i].data_out->readers > 0); - assert(original->device_copies[0] != NULL); - assert(task->data[i].data_in == original->device_copies[0]); - /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->candidate[i] = task->data[i].data_out; - - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - - if (original->device_copies[0] == NULL || task->data[i].data_out->version > original->device_copies[0]->version || - task->data[i].data_out->version > task->data[i].data_in->version) - { - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_OWNED; - parsec_list_push_back(&dealer_device->gpu_mem_owned_lru, (parsec_list_item_t *)task->data[i].data_out); - } - else - { - task->data[i].data_out->coherency_state = PARSEC_DATA_COHERENCY_SHARED; - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - } - } + /** * If the flow is write only, we free the data immediatly as this data should never * be written back. As the data_in of a write only flow is always CPU copy we revert @@ -1120,23 +1051,50 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[0] != NULL); assert(task->data[i].data_in == original->device_copies[0]); + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "Migrate: data %p attached to original %p [readers %d, ref_count %d] freed from device %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); - PARSEC_OBJ_RELEASE(task->data[i].data_out); + zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); task->data[i].data_out->device_private = NULL; + PARSEC_OBJ_RELEASE(task->data[i].data_out); + + /** The data in used in the first stage-in may have been released. + * But we store the original data_in and that can bes used for the stage_in. + * As the flow is write-only we dont care about the version of the data. + */ + assert(gpu_task->original_data_in[i] != NULL); + task->data[i].data_in = gpu_task->original_data_in[i]; + } + else + { + assert(task->data[i].data_out->readers > 0); + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ + gpu_task->candidate[i] = task->data[i].data_out; - parsec_atomic_unlock(&original->lock); + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + } + + parsec_atomic_unlock(&original->lock); } } @@ -1371,6 +1329,10 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, { int rc = 0, nb_migrated = 0, i = 0; parsec_task_t* task = NULL; + parsec_gpu_task_t *gpu_task = NULL; + + parsec_list_t *gpu_tasks_to_free = NULL; + gpu_tasks_to_free = PARSEC_OBJ_NEW(parsec_list_t); (void)es; @@ -1404,12 +1366,24 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, /** try completion PARSEC_MAX_EVENTS_PER_STREAM tasks */ for( i = 0; i < PARSEC_MAX_EVENTS_PER_STREAM; i++) { + gpu_task = NULL; task = NULL; - task = (parsec_task_t*)parsec_fifo_pop( &(gpu_device->to_complete) ); - if( task != NULL) + + gpu_task = (parsec_gpu_task_t*)parsec_fifo_pop( &(gpu_device->to_complete) ); + if( gpu_task != NULL) { + task = gpu_task->ec; __parsec_complete_execution( es, task ); parsec_atomic_fetch_dec_int32( &(gpu_device->complete_mutex) ); + + int f = 0; + for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) + { + if( gpu_task->original_data_in[f] != NULL ) + PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } + + parsec_list_push_back(gpu_tasks_to_free, (parsec_list_item_t*)gpu_task); } if( gpu_device->complete_mutex == 0 ) @@ -1420,6 +1394,18 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, } } + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->co_manager_mutex) ); + + /** We free the task delegated to the co-manager only at the end. Or else it may + * interfere with some operations in the manager. + */ + while(NULL != (gpu_task = (parsec_gpu_task_t*)parsec_list_pop_front(gpu_tasks_to_free)) ) + { + free(gpu_task); + } + + PARSEC_OBJ_RELEASE(gpu_tasks_to_free); + return PARSEC_HOOK_RETURN_ASYNC; } \ No newline at end of file diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 0151d5597..e56a10496 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -2856,6 +2856,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, cudaError_t status; int rc, rc1, exec_stream = 0, nb_migrated = 0; parsec_gpu_task_t *progress_task, *out_task_submit = NULL, *out_task_pop = NULL; + int manager_completing_task = 0; #if defined(PARSEC_DEBUG_NOISIER) char tmp[MAX_TASK_STRLEN]; #endif @@ -3041,6 +3042,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, __parsec_reschedule(es, progress_task->ec); gpu_task = progress_task; progress_task = NULL; + manager_completing_task = 1; goto remove_gpu_task; } gpu_task = NULL; @@ -3130,6 +3132,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_D2D_COMPLETE) { free( gpu_task->ec ); gpu_task->ec = NULL; + manager_completing_task = 1; goto remove_gpu_task; } @@ -3140,6 +3143,22 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, if( parsec_cuda_delegate_task_completion == 0 ) { __parsec_complete_execution( es, gpu_task->ec ); + manager_completing_task = 1; + + /** + * @brief For tasks migrated after stage_ in, during the first stage_in + * we would have increased the refcount of the data_in. If the task was not + * migrated, then the generated code would have decremented this refcount after + * the task was executed. But now as we have migrated the task, this decrement + * will not happen. Here we RELEASE the remembered data_in of the task. + */ + + int f = 0; + for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) + { + if( gpu_task->original_data_in[f] != NULL ) + PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } if(parsec_migrate_statistics) inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); @@ -3148,7 +3167,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, else if ( gpu_device->co_manager_mutex > 0 ) { parsec_atomic_fetch_inc_int32( &(gpu_device->complete_mutex) ); - parsec_fifo_push( &(gpu_device->to_complete), (parsec_list_item_t*)gpu_task->ec ); + parsec_fifo_push( &(gpu_device->to_complete), (parsec_list_item_t*)gpu_task ); + manager_completing_task = 0; if(parsec_migrate_statistics) inc_co_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); @@ -3157,6 +3177,14 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, else { __parsec_complete_execution( es, gpu_task->ec ); + manager_completing_task = 1; + + int f = 0; + for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) + { + if( gpu_task->original_data_in[f] != NULL ) + PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } if(parsec_migrate_statistics) inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); @@ -3167,20 +3195,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->complete_time = time_stamp(); #endif - /** - * @brief For tasks migrated after stage_ in, during the first stage_in - * we would have increased the refcount of the data_in. If the task was not - * migrated, then the generated code would have decremented this refcount after - * the task was executed. But now as we have migrated the task, this decrement - * will not happen. Here we RELEASE the remembered data_in of the task. - */ - - int f = 0; - for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) - { - if( gpu_task->original_data_in[f] != NULL ) - PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); - } #if defined(PARSEC_PROF_TRACE) if( gpu_task != NULL ) @@ -3213,7 +3227,7 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; prof_info.clock_speed = gpu_task->clock_speed; prof_info.class_id = gpu_task->ec->task_class->task_class_id; - prof_info.exec_stream_index = gpu_task->exec_stream_index; + prof_info.exec_stream_index = gpu_task->exec_stream_index; parsec_profiling_trace_flags(es->es_profile, parsec_gpu_task_count_end, @@ -3237,7 +3251,11 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, parsec_device_load[gpu_device->super.device_index] -= parsec_device_sweight[gpu_device->super.device_index]; PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream,"GPU[%s]: gpu_task %p freed at %s:%d", gpu_device->super.name, gpu_task, __FILE__, __LINE__); - free( gpu_task ); + + /* free the task only if the manager is completing the task*/ + if(manager_completing_task == 1) + free( gpu_task ); + rc = parsec_atomic_fetch_dec_int32( &(gpu_device->mutex) ); if( 1 == rc ) { /* I was the last one */ #if defined(PARSEC_PROF_TRACE) @@ -3248,23 +3266,6 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_DEBUG_VERBOSE(2, parsec_gpu_output_stream,"GPU[%s]: Leaving GPU management at %s:%d", gpu_device->super.name, __FILE__, __LINE__); - - /** manager has nothing else to do. So it can complete the tasks offloaded to the co-manager */ - parsec_task_t *task = NULL; - while( gpu_device->complete_mutex > 0) - { - task = NULL; - task = (parsec_task_t*)parsec_fifo_pop( &(gpu_device->to_complete) ); - if( task != NULL) - { - __parsec_complete_execution( es, task ); - parsec_atomic_fetch_dec_int32( &(gpu_device->complete_mutex) ); - - if(parsec_migrate_statistics) - inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); - } - } - return PARSEC_HOOK_RETURN_ASYNC; } gpu_task = progress_task; From f4f855c8804b86899416609100f5a5fdcad9b8b6 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 20 Dec 2022 01:22:49 -0500 Subject: [PATCH 205/215] Statistics updated to count the number of task mapped for iterative applications. --- parsec/mca/device/cuda/device_cuda_migrate.c | 15 ++++++++ parsec/mca/device/cuda/device_cuda_migrate.h | 2 + parsec/mca/device/cuda/device_cuda_module.c | 39 +++++++++++++++----- parsec/mca/device/device.c | 9 ++++- parsec/mca/device/device_gpu.h | 1 + parsec/mca/device/transfer_gpu.c | 5 ++- 6 files changed, 59 insertions(+), 12 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index f88d74eee..9fd5f9b60 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -83,6 +83,7 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].nb_stage_in_req = 0; device_info[i].completed_co_manager = 0; device_info[i].completed_manager = 0; + device_info[i].iterative_mapped = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -110,6 +111,7 @@ int parsec_cuda_migrate_fini() float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; int summary_total_evictions = 0, summary_total_stage_in = 0, summary_total_stage_in_req = 0; int summary_completed_manager = 0, summary_completed_co_manager = 0; + int summary_iterative_mapped = 0; #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); @@ -137,6 +139,7 @@ int parsec_cuda_migrate_fini() summary_total_stage_in_req += device_info[i].nb_stage_in_req; summary_completed_manager += device_info[i].completed_manager; summary_completed_co_manager += device_info[i].completed_co_manager; + summary_iterative_mapped += device_info[i].iterative_mapped; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); @@ -166,6 +169,8 @@ int parsec_cuda_migrate_fini() printf("Perc eviction for stage in required : %lf \n", (((float)device_info[i].evictions / device_info[i].nb_stage_in_req) * 100 )); printf("Tasks completed by manager : %d \n", device_info[i].completed_manager); printf("Tasks completed by co-manager : %d \n", device_info[i].completed_co_manager); + printf("Tasks mapped iteratively : %d \n", device_info[i].iterative_mapped); + } printf("\n *********** SUMMARY *********** \n"); @@ -197,6 +202,8 @@ int parsec_cuda_migrate_fini() printf("Tasks completed by manager : %d \n", summary_completed_manager); printf("Tasks completed by co-manager : %d \n", summary_completed_co_manager); + printf("Tasks mapped iteratively : %d \n", summary_iterative_mapped); + @@ -1232,6 +1239,14 @@ int inc_co_manager_complete_count(int device_index) return device_info[device_index].completed_co_manager; } +int inc_iterative_mapped_count(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].iterative_mapped); + return device_info[device_index].iterative_mapped; +} + + + int find_task_affinity_to_starving_node(parsec_gpu_task_t *gpu_task, int device_index, int status) { int i; diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index 95ebb8bab..dbba942ac 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -56,6 +56,7 @@ typedef struct parsec_device_cuda_info_s int nb_stage_in_req; int completed_manager; int completed_co_manager; + int iterative_mapped; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -149,5 +150,6 @@ parsec_hook_return_t parsec_cuda_co_manager( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device ); int inc_manager_complete_count(int device_index); int inc_co_manager_complete_count(int device_index); +int inc_iterative_mapped_count(int device_index); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index e56a10496..268e0d68c 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -494,6 +494,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) /* Initialize internal lists */ PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); + PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_tmp_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->to_complete, parsec_fifo_t); @@ -645,6 +646,7 @@ parsec_cuda_module_fini(parsec_device_module_t* device) /* Cleanup the GPU memory. */ PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); + PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_tmp_lru); return PARSEC_SUCCESS; } @@ -737,8 +739,10 @@ parsec_cuda_memory_reserve( parsec_device_cuda_module_t* cuda_device, "GPU[%s] Retain and insert CUDA copy %p attached to original %p on device_index %d [readers %d, ref_count %d] in LRU", gpu_device->super.name, gpu_elem, gpu_elem->original, gpu_elem->device_index, gpu_elem->readers, gpu_elem->super.super.obj_reference_count); - // assert( gpu_elem->super.super.obj_reference_count == 1); - assert(gpu_copy->device_index == gpu_device->super.device_index); + + assert(gpu_copy->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)gpu_elem); + PARSEC_LIST_ITEM_SINGLETON(gpu_elem); parsec_list_push_back( &gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem ); cudaMemGetInfo( &free_mem, &total_mem ); } @@ -874,6 +878,13 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) /* Free all memory on GPU */ parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); + + if( !parsec_list_is_empty(&gpu_device->gpu_mem_tmp_lru) ) + { + printf("LIST is not empty \n"); + } + + #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) && !defined(_NDEBUG) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", @@ -1024,6 +1035,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, temp_loc[j], temp_loc[j]->super.super.obj_reference_count); /* push them at the head to reach them again at the next iteration */ assert(temp_loc[j]->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)temp_loc[j]); + PARSEC_LIST_ITEM_SINGLETON(temp_loc[j]); parsec_list_push_front(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)temp_loc[j]); } #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) @@ -1036,6 +1049,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, return PARSEC_HOOK_RETURN_NEXT; } + parsec_list_item_ring_chop((parsec_list_item_t*)lru_gpu_elem); PARSEC_LIST_ITEM_SINGLETON(lru_gpu_elem); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:%s: Evaluate LRU-retrieved CUDA copy %p [ref_count %d] original %p", @@ -1053,6 +1067,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:%s: Drop LRU-retrieved CUDA copy %p [readers %d, ref_count %d] original %p", gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); + goto find_another_data; // TODO: add an assert of some sort to check for leaks here? } /* It's also possible that the ref_count of that element is bigger than 1 @@ -1073,6 +1088,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); assert(lru_gpu_elem->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)&lru_gpu_elem->super); + PARSEC_LIST_ITEM_SINGLETON(&lru_gpu_elem->super); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1101,6 +1118,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, * need to protect all accesses to gpu_mem_lru with the locked version */ assert(0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); assert(lru_gpu_elem->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)&lru_gpu_elem->super); + PARSEC_LIST_ITEM_SINGLETON(&lru_gpu_elem->super); parsec_list_push_back(&gpu_device->gpu_mem_lru, &lru_gpu_elem->super); if( NULL == gpu_mem_lru_cycling ) { gpu_mem_lru_cycling = lru_gpu_elem; @@ -1147,6 +1166,7 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, } int do_unlock = oldmaster->super.obj_reference_count != 1; parsec_data_copy_detach(oldmaster, lru_gpu_elem, gpu_device->super.device_index); + if( do_unlock ) parsec_atomic_unlock( &oldmaster->lock ); assert(lru_gpu_elem->readers == 0); @@ -1247,6 +1267,8 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count); assert(0 != (gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); assert(gpu_elem->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)gpu_elem); + PARSEC_LIST_ITEM_SINGLETON(gpu_elem); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_elem); parsec_atomic_unlock(&master->lock); } @@ -2072,6 +2094,7 @@ parsec_gpu_callback_complete_push(parsec_device_gpu_module_t *gpu_device, parsec_list_push_back(&src_device->gpu_mem_lru, (parsec_list_item_t*)task->data[i].data_in); src_device->data_avail_epoch++; } + parsec_atomic_unlock( &task->data[i].data_in->original->lock ); /* Notify any waiting thread that we're done messing with that device structure */ rc = parsec_atomic_cas_int32(&src_device->mutex, -1, 0); (void)rc; @@ -2581,6 +2604,7 @@ parsec_cuda_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_unlock(&original->lock); continue; /* done with this element, go for the next one */ } + PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%s]:\tread copy %p [ref_count %d] on flow %s has readers (%i)", gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name, gpu_copy->readers); @@ -2690,13 +2714,6 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, /* Don't bother if there is no real data (aka. CTL or no output) */ if(NULL == this_task->data[i].data_out) continue; - //if(0 != this_task->data[i].data_in->device_index) - //{ - // assert( this_task->data[i].data_in->original->device_copies[0] != NULL); - // this_task->data[i].data_in = this_task->data[i].data_in->original->device_copies[0]; - //} - - if( !(gpu_task->flow[i]->flow_flags & PARSEC_FLOW_ACCESS_WRITE) ) { /* Warning data_out for read only flows has been overwritten in pop */ continue; @@ -2758,6 +2775,8 @@ parsec_cuda_kernel_epilog( parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "CUDA copy %p [ref_count %d] moved to the owned LRU in %s", gpu_copy, gpu_copy->super.super.obj_reference_count, __func__); + parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); + PARSEC_LIST_ITEM_SINGLETON(gpu_copy); parsec_list_push_back(&gpu_device->gpu_mem_owned_lru, (parsec_list_item_t*)gpu_copy); } } @@ -2825,6 +2844,8 @@ parsec_cuda_kernel_cleanout( parsec_device_gpu_module_t *gpu_device, this_task->data[i].data_out = cpu_copy; if( 0 != (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) { assert(gpu_copy->device_index == gpu_device->super.device_index); + parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); + PARSEC_LIST_ITEM_SINGLETON(gpu_copy); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } parsec_atomic_unlock(&original->lock); diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index d445d8186..22e128064 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -48,6 +48,7 @@ static mca_base_component_t **device_components = NULL; extern int parsec_cuda_iterative; extern int parsec_cuda_unfair_mapping; +extern int parsec_migrate_statistics; /** * Temporary solution: Use the following two arrays to taskpool the weight and @@ -89,7 +90,13 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) { // if task to device mapping is already available use that dev_index = find_task_to_device_mapping(this_task); - if(dev_index != -1) return dev_index; + if(dev_index != -1) + { + if(parsec_migrate_statistics) + inc_iterative_mapped_count( CUDA_DEVICE_NUM(dev_index) ); + + return dev_index; + } } /* Select the location of the first data that is used in READ/WRITE or pick the diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 33c18d78e..ce605c965 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -156,6 +156,7 @@ struct parsec_device_gpu_module_s { */ parsec_list_t gpu_mem_lru; /* Read-only blocks, and fresh blocks */ parsec_list_t gpu_mem_owned_lru; /* Dirty blocks */ + parsec_list_t gpu_mem_tmp_lru; /* Dirty blocks */ parsec_fifo_t pending; parsec_fifo_t to_complete; struct zone_malloc_s *memory; diff --git a/parsec/mca/device/transfer_gpu.c b/parsec/mca/device/transfer_gpu.c index 775199b08..925b23269 100644 --- a/parsec/mca/device/transfer_gpu.c +++ b/parsec/mca/device/transfer_gpu.c @@ -243,7 +243,6 @@ parsec_gpu_create_w2r_task(parsec_device_gpu_module_t *gpu_device, } parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); PARSEC_LIST_ITEM_SINGLETON(gpu_copy); - //gpu_copy->readers++; PARSEC_DATA_COPY_INC_READERS(gpu_copy); d2h_task->data[nb_cleaned].data_out = gpu_copy; gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_UNDER_TRANSFER; /* mark the copy as in transfer */ @@ -296,7 +295,6 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, for( int i = 0; i < task->locals[0].value; i++ ) { gpu_copy = task->data[i].data_out; parsec_atomic_lock(&gpu_copy->original->lock); - //gpu_copy->readers--; PARSEC_DATA_COPY_DEC_READERS(gpu_copy); gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; gpu_device->super.data_out_to_host += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ @@ -311,6 +309,7 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "D2H[%s] task %p:%i GPU data copy %p [%p] has a backup in memory", gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); + printf("parsec_gpu_complete_w2r_task \n"); } else { gpu_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; cpu_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; @@ -323,6 +322,8 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "D2H[%s] task %p:%i GPU data copy %p [%p] now available", gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); + parsec_list_item_ring_chop((parsec_list_item_t*)gpu_copy); + PARSEC_LIST_ITEM_SINGLETON(gpu_copy); parsec_list_push_back(&gpu_device->gpu_mem_lru, (parsec_list_item_t*)gpu_copy); } parsec_atomic_unlock(&gpu_copy->original->lock); From 0a2fcab68776a7eba89f1bbf101e65323986a904 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 20 Dec 2022 04:46:46 -0500 Subject: [PATCH 206/215] More option added for iterative application. parsec_cuda_iterative = 1 maps only migrated task while parsec_cuda_iterative = 2 maps al tasks --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 ++++-- parsec/mca/device/cuda/device_cuda_module.c | 18 ++++++++++++++---- parsec/mca/device/device.c | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 9fd5f9b60..585e2ae11 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -230,8 +230,10 @@ int parsec_cuda_migrate_fini() else printf("Migration : delegated \n"); - if(parsec_cuda_iterative) - printf("Iterative task mapping : yes \n"); + if(parsec_cuda_iterative == 1) + printf("Iterative task mapping : mig task mapped \n"); + else if(parsec_cuda_iterative == 2) + printf("Iterative task mapping : all tasks mapped \n"); else printf("Iterative task mapping : no \n"); diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 268e0d68c..4ce3bba44 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -3111,11 +3111,21 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task = (parsec_gpu_task_t*)parsec_fifo_try_pop( &(gpu_device->pending) ); if( NULL != gpu_task ) { - /** - * if the task has been migrated, we have to update the mapping. - */ - if(parsec_cuda_iterative && (gpu_task->migrate_status > TASK_NOT_MIGRATED)) + if((parsec_cuda_iterative == 1) && (gpu_task->migrate_status > TASK_NOT_MIGRATED) + && (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL) ) + { + /** + * Only map tasks that are migrated. + */ update_task_to_device_mapping(gpu_task->ec, gpu_device->super.device_index); + } + else if( (parsec_cuda_iterative == 2) && (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_KERNEL)) + { + /** + * Only map all compute tasks. + */ + update_task_to_device_mapping(gpu_task->ec, gpu_device->super.device_index); + } pop_null = 0; gpu_task->last_data_check_epoch = gpu_device->data_avail_epoch - 1; /* force at least one tour */ diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index 22e128064..7903fcfd0 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -86,7 +86,7 @@ int parsec_get_best_device( parsec_task_t* this_task, double ratio ) int i, dev_index = -1, data_index, prefer_index = -1; parsec_taskpool_t* tp = this_task->taskpool; - if(parsec_cuda_iterative) + if(parsec_cuda_iterative > 0) { // if task to device mapping is already available use that dev_index = find_task_to_device_mapping(this_task); From abb30fdb564a1c441ff6c9ae30065e1190f51317 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Tue, 3 Jan 2023 17:40:32 -0500 Subject: [PATCH 207/215] 1. Check if zone is allocated before it is freed 2. Add zone_free data to gpu_mem_lru when migrating --- parsec/mca/device/cuda/device_cuda_migrate.c | 89 ++++++++++++++++++-- parsec/mca/device/cuda/device_cuda_migrate.h | 5 +- parsec/mca/device/cuda/device_cuda_module.c | 56 +++++++++--- parsec/mca/device/device_gpu.h | 3 +- parsec/mca/device/transfer_gpu.c | 5 +- parsec/utils/zone_malloc.c | 21 +++++ parsec/utils/zone_malloc.h | 5 ++ 7 files changed, 160 insertions(+), 24 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 585e2ae11..63d374047 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1,6 +1,7 @@ #include "parsec/mca/device/cuda/device_cuda_migrate.h" #include "parsec/include/parsec/os-spec-timing.h" #include "parsec/class/list.h" +#include "parsec/execution_stream.h" extern int parsec_device_cuda_enabled; extern int parsec_migrate_statistics; @@ -41,7 +42,18 @@ static void gpu_dev_profiling_init() { parsec_profiling_add_dictionary_keyword("GPU_TASK_COUNT", "fill:#FF0000", sizeof(gpu_dev_prof_t), - "first_queue_time{double};select_time{double};second_queue_time{double};exec_time_start{double};exec_time_end{double};first_stage_in_time_start{double};sec_stage_in_time_start{double};first_stage_in_time_end{double};sec_stage_in_time_end{double};stage_out_time_start{double};stage_out_time_end{double};complete_time{double};device_index{double};task_count{double};first_waiting_tasks{double};sec_waiting_tasks{double};mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};task_type{double};class_id{double};exec_stream_index{double}", + "first_queue_time{double};select_time{double};second_queue_time{double};" + "exec_time_start{double};exec_time_end{double};" + "first_stage_in_time_start{double};sec_stage_in_time_start{double};" + "first_stage_in_time_end{double};sec_stage_in_time_end{double};" + "stage_out_time_start{double};stage_out_time_end{double};" + "complete_time_start{double};complete_time_end{double};" + "device_index{double};task_count{double};" + "first_waiting_tasks{double};sec_waiting_tasks{double};" + "mig_status{double};nb_first_stage_in{double};nb_sec_stage_in{double};" + "nb_first_stage_in_d2d{double};nb_first_stage_in_h2d{double};" + "nb_sec_stage_in_d2d{double};nb_sec_stage_in_h2d{double};clock_speed{double};" + "task_type{double};class_id{double};exec_stream_index{double}", &parsec_gpu_task_count_start, &parsec_gpu_task_count_end); } @@ -83,7 +95,8 @@ int parsec_cuda_migrate_init(int ndevices) device_info[i].nb_stage_in_req = 0; device_info[i].completed_co_manager = 0; device_info[i].completed_manager = 0; - device_info[i].iterative_mapped = 0; + device_info[i].iterative_mapped = 0; + device_info[i].thrashing = 0; } task_mapping_ht = PARSEC_OBJ_NEW(parsec_hash_table_t); @@ -111,7 +124,7 @@ int parsec_cuda_migrate_fini() float summary_avg_task_migrated = 0, summary_deal_success_perc = 0, summary_avg_task_migrated_per_sucess = 0; int summary_total_evictions = 0, summary_total_stage_in = 0, summary_total_stage_in_req = 0; int summary_completed_manager = 0, summary_completed_co_manager = 0; - int summary_iterative_mapped = 0; + int summary_iterative_mapped = 0, summary_thrashing = 0; #if defined(PARSEC_PROF_TRACE) nvmlShutdown(); @@ -140,6 +153,7 @@ int parsec_cuda_migrate_fini() summary_completed_manager += device_info[i].completed_manager; summary_completed_co_manager += device_info[i].completed_co_manager; summary_iterative_mapped += device_info[i].iterative_mapped; + summary_thrashing += device_info[i].thrashing; printf("\n *********** DEVICE %d *********** \n", i); printf("Total tasks executed : %d \n", device_info[i].total_tasks_executed); @@ -163,6 +177,7 @@ int parsec_cuda_migrate_fini() printf("Avg task migrated per successfull deal : %lf \n", avg_task_migrated_per_sucess); printf("Perc of successfull deals : %lf \n", deal_success_perc); printf("Evictions : %d \n", device_info[i].evictions); + printf("Thrashing : %d \n", device_info[i].thrashing); printf("Stage in initiated : %d \n", device_info[i].nb_stage_in); printf("Stage in required : %d \n", device_info[i].nb_stage_in_req); printf("Perc eviction for stage in initiated : %lf \n", (( (float)device_info[i].evictions / device_info[i].nb_stage_in) * 100 ) ); @@ -170,7 +185,6 @@ int parsec_cuda_migrate_fini() printf("Tasks completed by manager : %d \n", device_info[i].completed_manager); printf("Tasks completed by co-manager : %d \n", device_info[i].completed_co_manager); printf("Tasks mapped iteratively : %d \n", device_info[i].iterative_mapped); - } printf("\n *********** SUMMARY *********** \n"); @@ -195,6 +209,7 @@ int parsec_cuda_migrate_fini() printf("perc of successfull deals : %lf \n", summary_deal_success_perc); printf("Total evictions : %d \n", summary_total_evictions); + printf("Total Thrashing : %d \n", summary_thrashing); printf("Total stage in initiated : %d \n", summary_total_stage_in); printf("Total stage in required : %d \n", summary_total_stage_in_req); printf("Perc eviction for stage in initiated : %lf \n", (((float)summary_total_evictions / summary_total_stage_in) * 100 ) ); @@ -1067,10 +1082,15 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); - zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); + if( zone_is_allocated(gpu_device->memory, (void *)(task->data[i].data_out->device_private)) ) + { + zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); + } task->data[i].data_out->device_private = NULL; PARSEC_OBJ_RELEASE(task->data[i].data_out); @@ -1247,6 +1267,11 @@ int inc_iterative_mapped_count(int device_index) return device_info[device_index].iterative_mapped; } +int inc_thrashing_count(int device_index) +{ + parsec_atomic_fetch_inc_int32(&device_info[device_index].thrashing); + return device_info[device_index].thrashing; +} int find_task_affinity_to_starving_node(parsec_gpu_task_t *gpu_task, int device_index, int status) @@ -1341,15 +1366,13 @@ int find_task_to_task_affinity(parsec_gpu_task_t *first_gpu_task, parsec_gpu_tas } parsec_hook_return_t -parsec_cuda_co_manager( parsec_execution_stream_t *es, - parsec_device_gpu_module_t* gpu_device ) +parsec_cuda_co_manager( parsec_execution_stream_t *es, parsec_device_gpu_module_t* gpu_device ) { int rc = 0, nb_migrated = 0, i = 0; parsec_task_t* task = NULL; parsec_gpu_task_t *gpu_task = NULL; parsec_list_t *gpu_tasks_to_free = NULL; - gpu_tasks_to_free = PARSEC_OBJ_NEW(parsec_list_t); (void)es; @@ -1362,6 +1385,8 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, return PARSEC_HOOK_RETURN_ASYNC; } + gpu_tasks_to_free = PARSEC_OBJ_NEW(parsec_list_t); + /** * @brief The migrate_manager thread exits when there are no more * work to be done. @@ -1390,14 +1415,62 @@ parsec_cuda_co_manager( parsec_execution_stream_t *es, if( gpu_task != NULL) { task = gpu_task->ec; + + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_start = time_stamp(); + #endif + __parsec_complete_execution( es, task ); + + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_end = time_stamp(); + #endif + + #if defined(PARSEC_PROF_TRACE) + gpu_dev_prof_t prof_info; + prof_info.task_type = gpu_task->task_type; + prof_info.device_index = gpu_device->super.device_index; + prof_info.task_count = gpu_device->mutex; + prof_info.first_queue_time = gpu_task->first_queue_time; + prof_info.select_time = gpu_task->select_time; + prof_info.second_queue_time = gpu_task->second_queue_time; + prof_info.exec_time_start = gpu_task->exec_time_start; + prof_info.exec_time_end = gpu_task->exec_time_end; + prof_info.complete_time_start = gpu_task->complete_time_start; + prof_info.complete_time_end = gpu_task->complete_time_end; + prof_info.first_stage_in_time_start = gpu_task->first_stage_in_time_start; + prof_info.sec_stage_in_time_start = gpu_task->sec_stage_in_time_start; + prof_info.first_stage_in_time_end = gpu_task->first_stage_in_time_end; + prof_info.sec_stage_in_time_end = gpu_task->sec_stage_in_time_end; + prof_info.stage_out_time_start = gpu_task->stage_out_time_start; + prof_info.stage_out_time_end = gpu_task->stage_out_time_end; + prof_info.first_waiting_tasks = gpu_task->first_waiting_tasks; + prof_info.sec_waiting_tasks = gpu_task->sec_waiting_tasks; + prof_info.mig_status = gpu_task->migrate_status; + prof_info.nb_first_stage_in = gpu_task->nb_first_stage_in; + prof_info.nb_sec_stage_in = gpu_task->nb_sec_stage_in; + prof_info.nb_first_stage_in_d2d = gpu_task->nb_first_stage_in_d2d; + prof_info.nb_first_stage_in_h2d = gpu_task->nb_first_stage_in_h2d; + prof_info.nb_sec_stage_in_d2d = gpu_task->nb_sec_stage_in_d2d; + prof_info.nb_sec_stage_in_h2d = gpu_task->nb_sec_stage_in_h2d; + prof_info.clock_speed = gpu_task->clock_speed; + prof_info.class_id = gpu_task->ec->task_class->task_class_id; + prof_info.exec_stream_index = gpu_task->exec_stream_index; + parsec_profiling_trace_flags(es->es_profile, + parsec_gpu_task_count_end, + (uint64_t)gpu_task->ec->task_class->key_functions->key_hash(gpu_task->ec->task_class->make_key(gpu_task->ec->taskpool, gpu_task->ec->locals), NULL), + gpu_task->ec->taskpool->taskpool_id, &prof_info, 0); + #endif + parsec_atomic_fetch_dec_int32( &(gpu_device->complete_mutex) ); int f = 0; for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) { if( gpu_task->original_data_in[f] != NULL ) + { PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } } parsec_list_push_back(gpu_tasks_to_free, (parsec_list_item_t*)gpu_task); diff --git a/parsec/mca/device/cuda/device_cuda_migrate.h b/parsec/mca/device/cuda/device_cuda_migrate.h index dbba942ac..0de39bb19 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.h +++ b/parsec/mca/device/cuda/device_cuda_migrate.h @@ -57,6 +57,7 @@ typedef struct parsec_device_cuda_info_s int completed_manager; int completed_co_manager; int iterative_mapped; + int thrashing; } parsec_device_cuda_info_t; typedef struct migrated_task_s @@ -87,7 +88,8 @@ typedef struct gpu_dev_prof_s double sec_stage_in_time_end; double stage_out_time_start; double stage_out_time_end; - double complete_time; + double complete_time_start; + double complete_time_end; double device_index; double task_count; double first_waiting_tasks; @@ -151,5 +153,6 @@ parsec_hook_return_t parsec_cuda_co_manager( parsec_execution_stream_t *es, int inc_manager_complete_count(int device_index); int inc_co_manager_complete_count(int device_index); int inc_iterative_mapped_count(int device_index); +int inc_thrashing_count(int device_index); #endif diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 4ce3bba44..edf7ca172 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -841,7 +841,10 @@ static void parsec_cuda_memory_release_list(parsec_device_cuda_module_t* cuda_de #endif PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p [dev_prvt %p] attached to original %p on device_index %d at %s:%d", gpu_copy, gpu_copy->device_private, gpu_copy->original, gpu_copy->device_index ,__FILE__, __LINE__); - zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); + if( zone_is_allocated( cuda_device->super.memory, (void*)gpu_copy->device_private ) ) + { + zone_free( cuda_device->super.memory, (void*)gpu_copy->device_private ); + } #endif gpu_copy->device_private = NULL; @@ -1207,7 +1210,10 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, assert( 0 != (lru_gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "Freed copy %p attached to original %p on device_index %d at %s:%d", lru_gpu_elem, lru_gpu_elem->original, lru_gpu_elem->device_index, __FILE__, __LINE__); - zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); + if( zone_is_allocated( gpu_device->memory, (void*)(lru_gpu_elem->device_private)) ) + { + zone_free( gpu_device->memory, (void*)(lru_gpu_elem->device_private) ); + } lru_gpu_elem->device_private = NULL; data_avail_epoch++; PARSEC_DEBUG_VERBOSE(3, parsec_gpu_output_stream, @@ -1406,12 +1412,15 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { if( !((1 == gpu_elem->readers) && (PARSEC_FLOW_ACCESS_READ & type)) ) { - parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " + if(gpu_task->migrate_status == TASK_NOT_MIGRATED) + { + parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " "(possible anti-dependency,\n" "or concurrent accesses), please prevent that with CTL dependencies\n", gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); - parsec_atomic_unlock( &original->lock ); - return -1; + parsec_atomic_unlock( &original->lock ); + return -1; + } } } PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, @@ -2931,7 +2940,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, gpu_task->second_queue_time = 0; gpu_task->exec_time_start = 0; gpu_task->exec_time_end = 0; - gpu_task->complete_time = 0; + gpu_task->complete_time_start = 0; + gpu_task->complete_time_end = 0; gpu_task->first_stage_in_time_start = 0; gpu_task->sec_stage_in_time_start = 0; gpu_task->first_stage_in_time_end = 0; @@ -3157,6 +3167,10 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, PARSEC_LIST_ITEM_SINGLETON(gpu_task); if (gpu_task->task_type == PARSEC_GPU_TASK_TYPE_D2HTRANSFER) { parsec_gpu_complete_w2r_task(gpu_device, gpu_task, es); + + if (parsec_migrate_statistics) + inc_thrashing_count(CUDA_DEVICE_NUM(gpu_device->super.device_index)); + gpu_task = progress_task; goto fetch_task_from_shared_queue; } @@ -3173,8 +3187,17 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, /** The manager will complete the tasks */ if( parsec_cuda_delegate_task_completion == 0 ) { + + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_start = time_stamp(); + #endif + __parsec_complete_execution( es, gpu_task->ec ); manager_completing_task = 1; + + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_end = time_stamp(); + #endif /** * @brief For tasks migrated after stage_ in, during the first stage_in @@ -3188,7 +3211,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) { if( gpu_task->original_data_in[f] != NULL ) + { PARSEC_OBJ_RELEASE( gpu_task->original_data_in[f] ); + } } if(parsec_migrate_statistics) @@ -3207,9 +3232,18 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, /** If the co-manager is not yet ready */ else { + + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_start = time_stamp(); + #endif + __parsec_complete_execution( es, gpu_task->ec ); manager_completing_task = 1; + #if defined(PARSEC_PROF_TRACE) + gpu_task->complete_time_end = time_stamp(); + #endif + int f = 0; for( f = 0; f < gpu_task->ec->task_class->nb_flows; f++) { @@ -3221,14 +3255,9 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, inc_manager_complete_count( CUDA_DEVICE_NUM(gpu_device->super.device_index) ); } -#if defined(PARSEC_PROF_TRACE) - if(gpu_task != NULL) - gpu_task->complete_time = time_stamp(); -#endif - #if defined(PARSEC_PROF_TRACE) - if( gpu_task != NULL ) + if( gpu_task != NULL && manager_completing_task) { gpu_dev_prof_t prof_info; @@ -3240,7 +3269,8 @@ parsec_cuda_kernel_scheduler( parsec_execution_stream_t *es, prof_info.second_queue_time = gpu_task->second_queue_time; prof_info.exec_time_start = gpu_task->exec_time_start; prof_info.exec_time_end = gpu_task->exec_time_end; - prof_info.complete_time = gpu_task->complete_time; + prof_info.complete_time_start = gpu_task->complete_time_start; + prof_info.complete_time_end = gpu_task->complete_time_end; prof_info.first_stage_in_time_start = gpu_task->first_stage_in_time_start; prof_info.sec_stage_in_time_start = gpu_task->sec_stage_in_time_start; prof_info.first_stage_in_time_end = gpu_task->first_stage_in_time_end; diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index ce605c965..4f6266c68 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -105,7 +105,8 @@ struct parsec_gpu_task_s { double exec_time_end; double stage_out_time_start; double stage_out_time_end; - double complete_time; + double complete_time_start; + double complete_time_end; int32_t first_waiting_tasks; int32_t sec_waiting_tasks; int32_t nb_first_stage_in; diff --git a/parsec/mca/device/transfer_gpu.c b/parsec/mca/device/transfer_gpu.c index 925b23269..7c46ea5ce 100644 --- a/parsec/mca/device/transfer_gpu.c +++ b/parsec/mca/device/transfer_gpu.c @@ -274,6 +274,10 @@ parsec_gpu_create_w2r_task(parsec_device_gpu_module_t *gpu_device, w2r_task->stage_out = NULL; w2r_task->complete_stage = NULL; +#if defined(PARSEC_PROF_TRACE) + w2r_task->prof_key_end = -1; +#endif + (void)es; return w2r_task; } @@ -309,7 +313,6 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "D2H[%s] task %p:%i GPU data copy %p [%p] has a backup in memory", gpu_device->super.name, (void*)task, i, gpu_copy, gpu_copy->original); - printf("parsec_gpu_complete_w2r_task \n"); } else { gpu_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; cpu_copy->coherency_state = PARSEC_DATA_COHERENCY_SHARED; diff --git a/parsec/utils/zone_malloc.c b/parsec/utils/zone_malloc.c index 595b35265..a028c6dc0 100644 --- a/parsec/utils/zone_malloc.c +++ b/parsec/utils/zone_malloc.c @@ -120,6 +120,27 @@ void *zone_malloc(zone_malloc_t *gdata, size_t size) return NULL; } +int zone_is_allocated(zone_malloc_t *gdata, void *add) +{ + segment_t *current_segment, *next_segment, *prev_segment; + int current_tid, next_tid, prev_tid; + off_t offset; + + offset = (char*)add -gdata->base; + assert( (offset % gdata->unit_size) == 0); + current_tid = offset / gdata->unit_size; + current_segment = SEGMENT_AT_TID(gdata, current_tid); + + if( NULL == current_segment ) + { + return 0 ; + } + else + { + return 1; + } +} + void zone_free(zone_malloc_t *gdata, void *add) { segment_t *current_segment, *next_segment, *prev_segment; diff --git a/parsec/utils/zone_malloc.h b/parsec/utils/zone_malloc.h index a775c6372..5a2574052 100644 --- a/parsec/utils/zone_malloc.h +++ b/parsec/utils/zone_malloc.h @@ -68,6 +68,11 @@ size_t zone_in_use(zone_malloc_t *gdata); */ size_t zone_debug(zone_malloc_t *gdata, int level, int output_id, const char *prefix); +/** + * Check if a segment was allocated + */ +int zone_is_allocated(zone_malloc_t *gdata, void *add); + END_C_DECLS #endif /* _ZONE_MALLOC_H_ */ From c2ab0264579f21cf6c8a47032660432f114d00c3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 4 Jan 2023 00:13:01 -0500 Subject: [PATCH 208/215] single_try updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 63d374047..29d601392 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -537,7 +537,6 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d { do { - *tries += 1; task = (parsec_gpu_task_t *)parsec_list_nolock_pop_back(list); if (task != NULL) { @@ -547,6 +546,7 @@ int find_compute_tasks(parsec_list_t *list, parsec_device_gpu_module_t *dealer_d PARSEC_LIST_ITEM_SINGLETON((parsec_list_item_t *)task); parsec_list_nolock_push_back(ring, (parsec_list_item_t *)task); *deal_success += 1; + *tries += 1; } else { From c9350f6c1ca0ac466303b7e308157390123eb869 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 8 Jan 2023 06:02:55 -0500 Subject: [PATCH 209/215] 1. Memory management during migration of stage-intask refined. 2. Documentation updated. --- parsec/mca/device/cuda/device_cuda_migrate.c | 84 +++++++++++++------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 29d601392..8e18da6f0 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1062,11 +1062,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t *original = task->data[i].data_out->original; parsec_atomic_lock(&original->lock); - /** - * If the flow is write only, we free the data immediatly as this data should never - * be written back. As the data_in of a write only flow is always CPU copy we revert - * to the original stage_in mechanism for write only flows. - */ + /** Write only access to data_out*/ if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { @@ -1075,34 +1071,28 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(original->device_copies[0] != NULL); assert(task->data[i].data_in == original->device_copies[0]); - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] freed from device %d)", - task->data[i].data_out, original, task->data[i].data_out->readers, - task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index); - + /** + * @brief When the data is write access, is not in any tracking lists. + * As this we won't need this data (as the data is write only and only + * this task is supposed to be write to this data) we can add it to + * the gpu_mem_lru. As this data has no reader this ensure that the data + * is reused during zone_malloc(). + */ parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - - parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)parsec_mca_device_get(task->data[i].data_out->device_index); - parsec_data_copy_detach(original, task->data[i].data_out, gpu_device->super.device_index); - - if( zone_is_allocated(gpu_device->memory, (void *)(task->data[i].data_out->device_private)) ) - { - zone_free(gpu_device->memory, (void *)(task->data[i].data_out->device_private)); - } - task->data[i].data_out->device_private = NULL; - PARSEC_OBJ_RELEASE(task->data[i].data_out); /** The data in used in the first stage-in may have been released. * But we store the original data_in and that can bes used for the stage_in. * As the flow is write-only we dont care about the version of the data. - */ + */ assert(gpu_task->original_data_in[i] != NULL); task->data[i].data_in = gpu_task->original_data_in[i]; } - else + /** Read only access to data_out*/ + else if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); /** @@ -1112,17 +1102,55 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t */ gpu_task->candidate[i] = task->data[i].data_out; + /** + * As the data has atleast one reader, one of which is this task being + * migrated, this data will not be evicted. So this is atleast in the + * list gpu_mem_owned_lru. If some other task is writing to it (which + * should not happen) this will not be in any tracking list, but the tasks + * that is writing to it will eventually add it to the appropriate tracking + * list. + */ + #if 0 parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + #endif + } + /** Read and write access to data_out*/ + else if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + assert(task->data[i].data_out->readers == 1); + /** + * we set a possible candidate for this flow of the task. This will allow + * us to easily find the stage_in data as the possible candidate in + * parsec_gpu_data_stage_in() function. + */ + gpu_task->candidate[i] = task->data[i].data_out; - PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, - "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", - task->data[i].data_out, original, task->data[i].data_out->readers, - task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, - starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + /** + * @brief As the task has write access, it is not in any tracking lists. + * As the data has atleast one reader, one of which is this task being + * migrated, this data will not be evicted. So we add it to gpu_mem_lru. + * As this task has write access, no ther task has write access to it, + * and so we are not in danger of prematurely evicting the data because + * we added it to gpu_mem_lru. + */ + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); + PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + } + else + { + assert(0); } + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, + "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", + task->data[i].data_out, original, task->data[i].data_out->readers, + task->data[i].data_out->super.super.obj_reference_count, dealer_device->super.device_index, + starving_device->super.device_index, TASK_MIGRATED_AFTER_STAGE_IN); + parsec_atomic_unlock(&original->lock); } } From d7596e8348ecbae722e776e7bc7d3f91100425b3 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 8 Jan 2023 06:46:17 -0500 Subject: [PATCH 210/215] Make sure the data_in of the staged-in task is not NULL. --- parsec/mca/device/cuda/device_cuda_migrate.c | 87 ++++++++++++-------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 8e18da6f0..0ab216c0e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1061,38 +1061,10 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t *original = task->data[i].data_out->original; parsec_atomic_lock(&original->lock); - - /** Write only access to data_out*/ - if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) - { - assert(task->data[i].data_out->readers == 0); - assert(task->data[i].data_out->super.super.obj_reference_count == 1); - assert(original->device_copies[0] != NULL); - assert(task->data[i].data_in == original->device_copies[0]); - /** - * @brief When the data is write access, is not in any tracking lists. - * As this we won't need this data (as the data is write only and only - * this task is supposed to be write to this data) we can add it to - * the gpu_mem_lru. As this data has no reader this ensure that the data - * is reused during zone_malloc(). - */ - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - - /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can bes used for the stage_in. - * As the flow is write-only we dont care about the version of the data. - */ - assert(gpu_task->original_data_in[i] != NULL); - task->data[i].data_in = gpu_task->original_data_in[i]; - - } /** Read only access to data_out*/ - else if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); /** @@ -1110,15 +1082,52 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * that is writing to it will eventually add it to the appropriate tracking * list. */ - #if 0 + + + /** The data in used in the first stage-in may have been released. + * But we store the original data_in and that can bes used for the stage_in. + * As the flow is write-only we dont care about the version of the data. + */ + assert(gpu_task->original_data_in[i] != NULL); + if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) + { + task->data[i].data_in = gpu_task->original_data_in[i]; + } + + } + + /** Write only access to data_out */ + else if (!(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + { + assert(task->data[i].data_out->readers == 0); + assert(task->data[i].data_out->super.super.obj_reference_count == 1); + + /** + * @brief When the data is write access, is not in any tracking lists. + * As we won't need this data (as the data is write only and only + * this task is supposed to be write to this data) we can add it to + * the gpu_mem_lru. As this data has no reader this ensure that the data + * is reused during zone_malloc(). + */ parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - #endif + parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + + /** The data in used in the first stage-in may have been released. + * But we store the original data_in and that can bes used for the stage_in. + * As the flow is write-only we dont care about the version of the data. + */ + assert(gpu_task->original_data_in[i] != NULL); + if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) + { + task->data[i].data_in = gpu_task->original_data_in[i]; + } } + /** Read and write access to data_out*/ else if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && - (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) + (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers == 1); /** @@ -1139,6 +1148,16 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + + /** The data in used in the first stage-in may have been released. + * But we store the original data_in and that can bes used for the stage_in. + * As the flow is write-only we dont care about the version of the data. + */ + assert(gpu_task->original_data_in[i] != NULL); + if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) + { + task->data[i].data_in = gpu_task->original_data_in[i]; + } } else { From 21296fcd1a1a9ceef8137ced67f545f5ff3d2978 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sun, 8 Jan 2023 19:57:06 -0500 Subject: [PATCH 211/215] For stage-in task we set possible candidate for write only data as well. --- parsec/mca/device/cuda/device_cuda_migrate.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 0ab216c0e..ffc58e07e 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1105,11 +1105,19 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t /** * @brief When the data is write access, is not in any tracking lists. - * As we won't need this data (as the data is write only and only + * As we won't need this data (as the data is write only and only * this task is supposed to be write to this data) we can add it to - * the gpu_mem_lru. As this data has no reader this ensure that the data - * is reused during zone_malloc(). + * the gpu_mem_lru. + * + * But we decided that as D2D trasfers are more fast, compared to H2D transfers, + * we can use this data as the candidate fro the next stage-in. + * As this data has no reader this may endup being reused during zone_malloc(). + * So we increase the reader for this data. This reader will get decremented + * after the second arge in. */ + gpu_task->candidate[i] = task->data[i].data_out; + PARSEC_DATA_COPY_INC_READERS_ATOMIC(task->data[i].data_out); + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); From e220c4372db726d59d7959bc84610247535f5d8f Mon Sep 17 00:00:00 2001 From: Joseph John Date: Mon, 9 Jan 2023 00:14:52 -0500 Subject: [PATCH 212/215] 1. Conditions simplified in change_task_features() 2. Code cleanup 3. documenattion updated --- parsec/mca/device/cuda/device_cuda_migrate.c | 36 +++++++++----------- parsec/mca/device/cuda/device_cuda_module.c | 7 ---- parsec/mca/device/device_gpu.h | 1 - 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index ffc58e07e..de4bd2db8 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1054,9 +1054,7 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t for (i = 0; i < task->task_class->nb_flows; i++) { - if (task->data[i].data_out == NULL) - continue; - if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & gpu_task->flow[i]->flow_flags)) // CTL flow + if ( !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) ) // CTL flow continue; parsec_data_t *original = task->data[i].data_out->original; @@ -1080,13 +1078,12 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * list gpu_mem_owned_lru. If some other task is writing to it (which * should not happen) this will not be in any tracking list, but the tasks * that is writing to it will eventually add it to the appropriate tracking - * list. + * list during parsec_cuda_kernel_epilog. */ - /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can bes used for the stage_in. - * As the flow is write-only we dont care about the version of the data. + /** The data in used in the first stage-in may have been evicted. + * But we store the original data_in and that can be used for the stage_in. */ assert(gpu_task->original_data_in[i] != NULL); if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) @@ -1106,24 +1103,24 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t /** * @brief When the data is write access, is not in any tracking lists. * As we won't need this data (as the data is write only and only - * this task is supposed to be write to this data) we can add it to - * the gpu_mem_lru. + * this task is supposed to be write to this data) and it doesnt have any other readers + * we can add it to the gpu_mem_lru for eviction. * - * But we decided that as D2D trasfers are more fast, compared to H2D transfers, - * we can use this data as the candidate fro the next stage-in. - * As this data has no reader this may endup being reused during zone_malloc(). + * But we decided that as D2D trasfers are more fast, compared to H2D transfers. + * So we can use this data as the candidate for the next stage-in. + * As this data has no reader this may endup being evicted during zone_malloc(). * So we increase the reader for this data. This reader will get decremented - * after the second arge in. + * after the second stage-in. */ gpu_task->candidate[i] = task->data[i].data_out; PARSEC_DATA_COPY_INC_READERS_ATOMIC(task->data[i].data_out); - + parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can bes used for the stage_in. + * But we store the original data_in and that can be used for the stage_in. * As the flow is write-only we dont care about the version of the data. */ assert(gpu_task->original_data_in[i] != NULL); @@ -1149,17 +1146,16 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * @brief As the task has write access, it is not in any tracking lists. * As the data has atleast one reader, one of which is this task being * migrated, this data will not be evicted. So we add it to gpu_mem_lru. - * As this task has write access, no ther task has write access to it, - * and so we are not in danger of prematurely evicting the data because - * we added it to gpu_mem_lru. + * As this task has write access, no ther task has write access to it. + * So when we add it to gpu_mem_lru, we are not in danger of prematurely + * evicting the data, to whichn some other task has read or write access. */ parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can bes used for the stage_in. - * As the flow is write-only we dont care about the version of the data. + * But we store the original data_in and that can be used for the stage_in. */ assert(gpu_task->original_data_in[i] != NULL); if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index edf7ca172..f6a818d09 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -494,7 +494,6 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) /* Initialize internal lists */ PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); - PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_tmp_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->to_complete, parsec_fifo_t); @@ -646,7 +645,6 @@ parsec_cuda_module_fini(parsec_device_module_t* device) /* Cleanup the GPU memory. */ PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); - PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_tmp_lru); return PARSEC_SUCCESS; } @@ -882,11 +880,6 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); - if( !parsec_list_is_empty(&gpu_device->gpu_mem_tmp_lru) ) - { - printf("LIST is not empty \n"); - } - #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) && !defined(_NDEBUG) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 4f6266c68..27501fbc3 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -157,7 +157,6 @@ struct parsec_device_gpu_module_s { */ parsec_list_t gpu_mem_lru; /* Read-only blocks, and fresh blocks */ parsec_list_t gpu_mem_owned_lru; /* Dirty blocks */ - parsec_list_t gpu_mem_tmp_lru; /* Dirty blocks */ parsec_fifo_t pending; parsec_fifo_t to_complete; struct zone_malloc_s *memory; From a8a1df8ae9ff69080ffd4309017e94a636137716 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 25 Jan 2023 03:23:00 -0500 Subject: [PATCH 213/215] Signed-off-by: Joseph John Memory leak problem addressed by using a temporary list. --- parsec/mca/device/cuda/device_cuda_migrate.c | 6 +++ parsec/mca/device/cuda/device_cuda_module.c | 48 +++++++++++++++----- parsec/mca/device/device_gpu.h | 1 + 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index de4bd2db8..05d98b619 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1168,6 +1168,12 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t assert(0); } + if( task->data[i].data_out->push_task == task ) + { + printf("ERROR 2 \n"); + exit(0); + } + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "Migrate: data %p attached to original %p [readers %d, ref_count %d] migrated from device %d to %d (stage_in: %d)", task->data[i].data_out, original, task->data[i].data_out->readers, diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index f6a818d09..13c859dbc 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -494,6 +494,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) /* Initialize internal lists */ PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_mem_owned_lru, parsec_list_t); + PARSEC_OBJ_CONSTRUCT(&gpu_device->gpu_tmp_lru, parsec_list_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->pending, parsec_fifo_t); PARSEC_OBJ_CONSTRUCT(&gpu_device->to_complete, parsec_fifo_t); @@ -645,6 +646,7 @@ parsec_cuda_module_fini(parsec_device_module_t* device) /* Cleanup the GPU memory. */ PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_lru); PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_mem_owned_lru); + PARSEC_OBJ_DESTRUCT(&gpu_device->gpu_tmp_lru); return PARSEC_SUCCESS; } @@ -879,13 +881,19 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) /* Free all memory on GPU */ parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); + parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_tmp_lru); + //if(parsec_list_is_empty( &gpu_device->gpu_tmp_lru )) + //{ + // printf("ERROR \n"); + // exit( 0 ); + //} #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) && !defined(_NDEBUG) if( (in_use = zone_in_use(gpu_device->memory)) != 0 ) { parsec_warning("GPU[%s] memory leak detected: %lu bytes still allocated on GPU", device->name, in_use); - //assert(0); + assert(0); } #endif return PARSEC_SUCCESS; @@ -1063,7 +1071,12 @@ parsec_gpu_data_reserve_device_space( parsec_device_cuda_module_t* cuda_device, "GPU[%s]:%s: Drop LRU-retrieved CUDA copy %p [readers %d, ref_count %d] original %p", gpu_device->super.name, task_name, lru_gpu_elem, lru_gpu_elem->readers, lru_gpu_elem->super.super.obj_reference_count, lru_gpu_elem->original); - + + + parsec_list_item_ring_chop((parsec_list_item_t*)lru_gpu_elem); + PARSEC_LIST_ITEM_SINGLETON(lru_gpu_elem); + parsec_list_push_front(&gpu_device->gpu_tmp_lru, (parsec_list_item_t*)lru_gpu_elem); + goto find_another_data; // TODO: add an assert of some sort to check for leaks here? } /* It's also possible that the ref_count of that element is bigger than 1 @@ -1379,6 +1392,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, parsec_device_gpu_module_t *gpu_device = &cuda_device->super; int32_t type = flow->flow_flags; parsec_data_copy_t* in_elem = task_data->data_in; + parsec_data_copy_t* release_after_data_in_is_attached = NULL; parsec_data_t* original = in_elem->original; parsec_gpu_data_copy_t* gpu_elem = task_data->data_out; parsec_data_copy_t *candidate = NULL; @@ -1407,10 +1421,10 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, { if(gpu_task->migrate_status == TASK_NOT_MIGRATED) { - parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " - "(possible anti-dependency,\n" - "or concurrent accesses), please prevent that with CTL dependencies\n", - gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); + //parsec_warning("GPU[%s]:\tWrite access to data copy %p [ref_count %d] with existing readers [%d] " + // "(possible anti-dependency,\n" + // "or concurrent accesses), please prevent that with CTL dependencies\n", + // gpu_device->super.name, gpu_elem, gpu_elem->super.super.obj_reference_count, gpu_elem->readers); parsec_atomic_unlock( &original->lock ); return -1; } @@ -1493,7 +1507,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, /** Remember the original data_in. */ assert( gpu_task->original_data_in[ flow->flow_index ] == NULL); gpu_task->original_data_in[ flow->flow_index ] = task_data->data_in; - + + assert( gpu_task->migrate_status != TASK_MIGRATED_AFTER_STAGE_IN ); goto src_selected; } } @@ -1537,6 +1552,7 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, in_elem = candidate; in_elem_dev = target; + assert( gpu_task->migrate_status != TASK_MIGRATED_AFTER_STAGE_IN ); goto src_selected; } } @@ -1586,11 +1602,6 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, if( -1 == transfer_from ) { gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - - if( undo_readers_inc_if_no_transfer ) - { - PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); - } } else { @@ -1708,6 +1719,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, nb_elts); } parsec_atomic_unlock( &original->lock ); + if( NULL != release_after_data_in_is_attached ) + PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); assert(0); return -1; } @@ -1733,8 +1746,17 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, } gpu_elem->push_task = gpu_task->ec; /* only the task who does the transfer can modify the data status later. */ parsec_atomic_unlock( &original->lock ); + if( NULL != release_after_data_in_is_attached ) + PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); return 1; } + + if( undo_readers_inc_if_no_transfer ) + { + gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; + PARSEC_DATA_COPY_DEC_READERS_ATOMIC(in_elem); + } + assert( transfer_from == -1 || gpu_elem->data_transfer_status == PARSEC_DATA_STATUS_COMPLETE_TRANSFER ); parsec_data_end_transfer_ownership_to_copy(original, gpu_device->super.device_index, (uint8_t)type); @@ -1747,6 +1769,8 @@ parsec_gpu_data_stage_in( parsec_device_cuda_module_t* cuda_device, gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, nb_elts, in_elem->version, gpu_elem->version); parsec_atomic_unlock( &original->lock ); + if( NULL != release_after_data_in_is_attached ) + PARSEC_OBJ_RELEASE(release_after_data_in_is_attached); return 0; } diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index 27501fbc3..641d85953 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -157,6 +157,7 @@ struct parsec_device_gpu_module_s { */ parsec_list_t gpu_mem_lru; /* Read-only blocks, and fresh blocks */ parsec_list_t gpu_mem_owned_lru; /* Dirty blocks */ + parsec_list_t gpu_tmp_lru; /* Dirty blocks */ parsec_fifo_t pending; parsec_fifo_t to_complete; struct zone_malloc_s *memory; From 4c7cea74f8efadf02f5982d5a0accf47cbebcab9 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Sat, 28 Jan 2023 22:36:41 -0500 Subject: [PATCH 214/215] patch https://github.com/ICLDisco/parsec/pull/479/files implemented --- parsec/interfaces/ptg/ptg-compiler/jdf2c.c | 83 ++++------------------ 1 file changed, 15 insertions(+), 68 deletions(-) diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index 4f333cd9a..d92509af0 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -1538,6 +1538,7 @@ static void jdf_generate_structure(jdf_t *jdf) for(f = jdf->functions; f != NULL; f = f->next) { if( 0 == (f->user_defines & JDF_FUNCTION_HAS_UD_MAKE_KEY) ) { for(pl = f->parameters; pl != NULL; pl = pl->next) { + coutput(" int %s_%s_min;\n", f->fname, pl->name); coutput(" int %s_%s_range;\n", f->fname, pl->name); } } else { @@ -3327,30 +3328,8 @@ static void jdf_generate_deps_key_functions(const jdf_t *jdf, const jdf_functio for(vl = f->locals; vl != NULL; vl = vl->next) { if( local_is_parameter(f, vl) != NULL ) { - int have_min = 0; - if( vl->expr->op == JDF_RANGE ) { - coutput(" int %s%s_min = %s;\n", JDF2C_NAMESPACE, vl->name, dump_expr((void**)vl->expr->jdf_ta1, &info)); - have_min = 1; - } else { - if( vl->expr->local_variables != NULL ) { - char *vname; - if( asprintf(&vname, "%s%s_min", JDF2C_NAMESPACE, vl->name) <= 0 ) { - fprintf(stderr, "Cannot allocate internal memory for the PTG compiler\n"); - exit(-1); - } - coutput(" int %s;\n", vname); - jdf_generate_range_min_without_fn(jdf, vl->expr, vname, "(&"JDF2C_NAMESPACE"_assignments)"); - free(vname); - have_min = 1; - } - } - if(have_min) { - coutput(" int %s = (__parsec_key) %% __parsec_tp->%s_%s_range + %s%s_min;\n", - vl->name, f->fname, vl->name, JDF2C_NAMESPACE, vl->name); - } else { - coutput(" int %s = (__parsec_key) %% __parsec_tp->%s_%s_range;\n", - vl->name, f->fname, vl->name); - } + coutput(" int %s = (__parsec_key) %% __parsec_tp->%s_%s_range + __parsec_tp->%s_%s_min;\n", + vl->name, f->fname, vl->name, f->fname, vl->name); string_arena_add_string(sa_format, "%s%%d", first_param?"":", "); string_arena_add_string(sa_params, "%s%s", first_param?"":", ", vl->name); first_param = 0; @@ -3727,9 +3706,11 @@ static void jdf_generate_internal_init(const jdf_t *jdf, const jdf_function_entr vl = l2p_item->vl; if( NULL == (pl = l2p_item->pl) ) continue; if(vl->expr->op == JDF_RANGE || NULL != vl->expr->local_variables) { + coutput(" __parsec_tp->%s_%s_min = %s%s_min;\n", f->fname, pl->name, JDF2C_NAMESPACE, pl->name); coutput(" __parsec_tp->%s_%s_range = (%s%s_max - %s%s_min) + 1;\n", f->fname, pl->name, JDF2C_NAMESPACE, pl->name, JDF2C_NAMESPACE, pl->name); } else { + coutput(" __parsec_tp->%s_%s_min = 0;\n", f->fname, pl->name); coutput(" __parsec_tp->%s_%s_range = 1; /* single value, not a range */\n", f->fname, pl->name); } } @@ -4756,68 +4737,34 @@ static void jdf_generate_hashfunction_for(const jdf_t *jdf, const jdf_function_e "{\n", jdf_property_get_string(f->properties, JDF_PROP_UD_MAKE_KEY_FN_NAME, NULL)); if( f->parameters == NULL ) { - coutput(" return (parsec_key_t)0;\n" + coutput(" (void)tp; (void)assignment;\n" + " return (parsec_key_t)0;\n" "}\n"); } else { - coutput( " const __parsec_%s_internal_taskpool_t *__parsec_tp = (const __parsec_%s_internal_taskpool_t *)tp;\n" - " %s ascopy, *assignment = &ascopy;\n" - " uintptr_t __parsec_id = 0;\n" - " memcpy(assignment, as, sizeof(%s));\n", + coutput(" const __parsec_%s_internal_taskpool_t *__parsec_tp = (__parsec_%s_internal_taskpool_t *)tp;\n" + " const %s *assignment = (%s*)as;\n" + " uint64_t __parsec_id = 0;\n", jdf_basename, jdf_basename, parsec_get_name(jdf, f, "parsec_assignment_t"), parsec_get_name(jdf, f, "parsec_assignment_t")); - - info.prefix = ""; - info.suffix = ""; - info.sa = sa_range_multiplier; - info.assignments = "assignment"; + string_arena_init(sa_range_multiplier); - idx = 0; for(vl = f->locals; vl != NULL; vl = vl->next) { - string_arena_init(sa_range_multiplier); - - coutput(" const int %s = assignment->%s.value;\n", - vl->name, vl->name); if( local_is_parameter(f, vl) != NULL ) { - if( vl->expr->op == JDF_RANGE ) { - coutput(" int %s%s_min = %s;\n", JDF2C_NAMESPACE, vl->name, dump_expr((void**)vl->expr->jdf_ta1, &info)); - } else { - if( vl->expr->local_variables != NULL ) { - char *vname; - if( asprintf(&vname, "%s%s_min", JDF2C_NAMESPACE, vl->name) <= 0 ) { - fprintf(stderr, "Cannot allocate internal memory for the PTG compiler\n"); - exit(-1); - } - coutput(" int %s;\n", vname); - jdf_generate_range_min_without_fn(jdf, vl->expr, vname, "assignment"); - free(vname); - } else { - coutput(" int %s%s_min = %s;\n", JDF2C_NAMESPACE, vl->name, dump_expr((void**)vl->expr, &info)); - } - } - } else { /* IDs should depend only on the parameters of the * function. However, we might need the other definitions because * the min expression of the parameters might depend on them. If * this is not the case, a quick "(void)" removes the warning. */ - coutput(" (void)%s;\n", vl->name); - } - idx++; - } - - string_arena_init(sa_range_multiplier); - for(vl = f->locals; vl != NULL; vl = vl->next) { - if( local_is_parameter(f, vl) != NULL ) { - coutput(" __parsec_id += (%s - %s%s_min)%s;\n", vl->name, JDF2C_NAMESPACE, vl->name, string_arena_get_string(sa_range_multiplier)); + coutput(" __parsec_id += (assignment->%s.value - __parsec_tp->%s_%s_min)%s;\n", vl->name, f->fname, vl->name, + string_arena_get_string(sa_range_multiplier)); string_arena_add_string(sa_range_multiplier, " * __parsec_tp->%s_%s_range", f->fname, vl->name); } } - coutput(" (void)__parsec_tp;\n" - " return (parsec_key_t)__parsec_id;\n" - "}\n"); + coutput(" return (parsec_key_t)__parsec_id;\n" + "}\n"); } } From 889db226b8d31419d82016287f19700e08ebdad9 Mon Sep 17 00:00:00 2001 From: Joseph John Date: Wed, 8 Feb 2023 03:32:21 -0500 Subject: [PATCH 215/215] change_task_features() simplified --- parsec/mca/device/cuda/device_cuda_migrate.c | 95 ++++++-------------- parsec/mca/device/cuda/device_cuda_module.c | 5 -- 2 files changed, 29 insertions(+), 71 deletions(-) diff --git a/parsec/mca/device/cuda/device_cuda_migrate.c b/parsec/mca/device/cuda/device_cuda_migrate.c index 05d98b619..92be9e63a 100644 --- a/parsec/mca/device/cuda/device_cuda_migrate.c +++ b/parsec/mca/device/cuda/device_cuda_migrate.c @@ -1060,36 +1060,27 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t parsec_data_t *original = task->data[i].data_out->original; parsec_atomic_lock(&original->lock); + /** + * We set the current data_out of the task as the possible candidate for this flow of the task. + * This will allow us to easily find the current data_out as the data_in for the next stage_in + * ( in the function parsec_gpu_data_stage_in()). + */ + gpu_task->candidate[i] = task->data[i].data_out; + /** Read only access to data_out*/ if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && !(PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { assert(task->data[i].data_out->readers > 0); - /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->candidate[i] = task->data[i].data_out; + /** * As the data has atleast one reader, one of which is this task being - * migrated, this data will not be evicted. So this is atleast in the - * list gpu_mem_owned_lru. If some other task is writing to it (which - * should not happen) this will not be in any tracking list, but the tasks - * that is writing to it will eventually add it to the appropriate tracking - * list during parsec_cuda_kernel_epilog. - */ - - - /** The data in used in the first stage-in may have been evicted. - * But we store the original data_in and that can be used for the stage_in. + * migrated, this data will not be evicted. If this is not part of any LRU + * (due to the orpahing mechanism in parsec_gpu_data_reserve_device_space() + * it will eventually add it to the appropriate tracking + * list during parsec_gpu_callback_complete_push(). */ - assert(gpu_task->original_data_in[i] != NULL); - if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) - { - task->data[i].data_in = gpu_task->original_data_in[i]; - } } @@ -1102,8 +1093,8 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t /** * @brief When the data is write access, is not in any tracking lists. - * As we won't need this data (as the data is write only and only - * this task is supposed to be write to this data) and it doesnt have any other readers + * As we won't need this data (as the data is write only and only this + * task is supposed to be write to this data) and it doesnt have any other readers * we can add it to the gpu_mem_lru for eviction. * * But we decided that as D2D trasfers are more fast, compared to H2D transfers. @@ -1111,67 +1102,39 @@ int change_task_features(parsec_gpu_task_t *gpu_task, parsec_device_gpu_module_t * As this data has no reader this may endup being evicted during zone_malloc(). * So we increase the reader for this data. This reader will get decremented * after the second stage-in. + * + * The data will be moved to the gpu_mem_lru when parsec_gpu_callback_complete_push + * have been called after the second stage-in. */ - gpu_task->candidate[i] = task->data[i].data_out; - PARSEC_DATA_COPY_INC_READERS_ATOMIC(task->data[i].data_out); - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); + PARSEC_DATA_COPY_INC_READERS_ATOMIC(task->data[i].data_out); - /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can be used for the stage_in. - * As the flow is write-only we dont care about the version of the data. - */ - assert(gpu_task->original_data_in[i] != NULL); - if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) - { - task->data[i].data_in = gpu_task->original_data_in[i]; - } } /** Read and write access to data_out*/ else if ((PARSEC_FLOW_ACCESS_READ & gpu_task->flow[i]->flow_flags) && (PARSEC_FLOW_ACCESS_WRITE & gpu_task->flow[i]->flow_flags)) { - assert(task->data[i].data_out->readers == 1); /** - * we set a possible candidate for this flow of the task. This will allow - * us to easily find the stage_in data as the possible candidate in - * parsec_gpu_data_stage_in() function. - */ - gpu_task->candidate[i] = task->data[i].data_out; - - /** - * @brief As the task has write access, it is not in any tracking lists. - * As the data has atleast one reader, one of which is this task being - * migrated, this data will not be evicted. So we add it to gpu_mem_lru. - * As this task has write access, no ther task has write access to it. - * So when we add it to gpu_mem_lru, we are not in danger of prematurely - * evicting the data, to whichn some other task has read or write access. - */ - parsec_list_item_ring_chop((parsec_list_item_t *)task->data[i].data_out); - PARSEC_LIST_ITEM_SINGLETON(task->data[i].data_out); - parsec_list_push_back(&dealer_device->gpu_mem_lru, (parsec_list_item_t *)task->data[i].data_out); - - /** The data in used in the first stage-in may have been released. - * But we store the original data_in and that can be used for the stage_in. + * @brief When the data is write access, is not in any tracking lists. + * + * The data will be moved to the gpu_mem_lru when parsec_gpu_callback_complete_push + * have been called after the second stage-in. */ - assert(gpu_task->original_data_in[i] != NULL); - if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) - { - task->data[i].data_in = gpu_task->original_data_in[i]; - } + assert(task->data[i].data_out->readers == 1); } else { assert(0); } - if( task->data[i].data_out->push_task == task ) + /** + * The data in used in the first stage-in may have been evicted. + * But we store the original data_in and that can be used for the stage_in. + */ + if( gpu_task->original_data_in[i] != NULL && task->data[i].data_in == NULL) { - printf("ERROR 2 \n"); - exit(0); + task->data[i].data_in = gpu_task->original_data_in[i]; } PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 13c859dbc..5e330ed85 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -882,11 +882,6 @@ parsec_cuda_flush_lru( parsec_device_module_t *device ) parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_lru); parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_mem_owned_lru); parsec_cuda_memory_release_list(cuda_device, &gpu_device->gpu_tmp_lru); - //if(parsec_list_is_empty( &gpu_device->gpu_tmp_lru )) - //{ - // printf("ERROR \n"); - // exit( 0 ); - //} #if !defined(PARSEC_GPU_CUDA_ALLOC_PER_TILE) && !defined(_NDEBUG)