Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions parsec/mca/device/device_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ static int parsec_gpu_profiling_initiated = 0;
int parsec_gpu_output_stream = -1;
int parsec_gpu_verbosity;

/* The return value of these functions is either a parsec_hook_return_t for <= 0 values,
* or a positive number which represents that something has been scheduled on the gpu_stream
*/
typedef int(*parsec_gpu_step_function_t)(parsec_device_gpu_module_t *gpu_device,
parsec_gpu_task_t *gpu_task,
parsec_gpu_exec_stream_t *gpu_stream);

static inline int
parsec_device_check_space_needed(parsec_device_gpu_module_t *gpu_device,
parsec_gpu_task_t *gpu_task)
Expand Down Expand Up @@ -1461,7 +1468,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device,
PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,
"GPU[%d:%s]:\tThere is a potential alternative source for data_in %p [ref_count %d] in original %p to go in copy %p [ref_count %d], but it is not ready, falling back on CPU source",
gpu_device->super.device_index, gpu_device->super.name, task_data->data_in, task_data->data_in->super.super.obj_reference_count, original, gpu_elem, gpu_elem->super.super.obj_reference_count);
//return PARSEC_HOOK_RETURN_NEXT;
//return PARSEC_HOOK_RETURN_AGAIN;
}

/* We fall back on the CPU copy */
Expand Down Expand Up @@ -1878,7 +1885,7 @@ parsec_device_callback_complete_push(parsec_device_gpu_module_t *gpu_device,
static inline int
parsec_device_progress_stream( parsec_device_gpu_module_t* gpu_device,
parsec_gpu_exec_stream_t* stream,
parsec_advance_task_function_t progress_fct,
parsec_gpu_step_function_t progress_fct,
parsec_gpu_task_t* task,
parsec_gpu_task_t** out_task )
{
Expand Down Expand Up @@ -1937,6 +1944,7 @@ parsec_device_progress_stream( parsec_device_gpu_module_t* gpu_device,
}

grab_a_task:
assert(NULL == task);
if( NULL == stream->tasks[stream->start] ) { /* there is room on the stream */
task = (parsec_gpu_task_t*)parsec_list_pop_front(stream->fifo_pending); /* get the best task */
}
Expand All @@ -1949,6 +1957,20 @@ parsec_device_progress_stream( parsec_device_gpu_module_t* gpu_device,

schedule_task:
rc = progress_fct( gpu_device, task, stream );
if(0 == rc) {
#if defined(PARSEC_PROF_TRACE)
if( stream->prof_event_track_enable ) {
if( task->prof_key_end != -1 ) {
PARSEC_PROFILING_TRACE(stream->profiling, task->prof_key_end, task->prof_event_id, task->prof_tp_id, NULL);
}
}
#endif
/* If progress_fct added nothing on that stream, we skip scheduling a record on the GPU stream */
if( task->complete_stage )
rc = task->complete_stage(gpu_device, &task, stream);
*out_task = task;
return rc;
}
if( 0 > rc ) {
if( PARSEC_HOOK_RETURN_AGAIN != rc ) {
if( PARSEC_HOOK_RETURN_NEXT == rc ) {
Expand Down Expand Up @@ -1978,7 +2000,6 @@ parsec_device_progress_stream( parsec_device_gpu_module_t* gpu_device,
"trigger the task will be handled accordingly",
gpu_device->super.device_index, gpu_device->super.name, (void*)task);
}
task->last_status = rc;
/**
* Do not skip the gpu event generation. The problem is that some of the inputs
* might be in the pipe of being transferred to the GPU. If we activate this task
Expand Down Expand Up @@ -2018,7 +2039,7 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
{
parsec_task_t *this_task = gpu_task->ec;
const parsec_flow_t *flow;
int i, ret = 0;
int i, ret = 0, how_many = 0;
#if defined(PARSEC_DEBUG_NOISIER)
char tmp[MAX_TASK_STRLEN];
#endif
Expand Down Expand Up @@ -2089,6 +2110,7 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
gpu_task->last_status = ret;
return ret;
}
how_many += ret;
}

PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,
Expand All @@ -2099,7 +2121,7 @@ parsec_device_kernel_push( parsec_device_gpu_module_t *gpu_device,
#if defined(PARSEC_PROF_TRACE)
gpu_task->prof_key_end = -1; /* We do not log that event as the completion of this task */
#endif
return PARSEC_HOOK_RETURN_DONE;
return how_many;
}

/**
Expand All @@ -2116,6 +2138,7 @@ parsec_device_kernel_exec( parsec_device_gpu_module_t *gpu_device,
{
parsec_advance_task_function_t progress_fct = gpu_task->submit;
parsec_task_t* this_task = gpu_task->ec;
int rc;

#if defined(PARSEC_DEBUG_NOISIER)
char tmp[MAX_TASK_STRLEN];
Expand Down Expand Up @@ -2152,7 +2175,9 @@ parsec_device_kernel_exec( parsec_device_gpu_module_t *gpu_device,
#endif /* defined(PARSEC_DEBUG_PARANOID) */

(void)this_task;
return progress_fct( gpu_device, gpu_task, gpu_stream );
rc = progress_fct( gpu_device, gpu_task, gpu_stream );
gpu_task->last_status = rc;
return 1;
}

/**
Expand Down Expand Up @@ -2193,8 +2218,9 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device,
return_code = PARSEC_HOOK_RETURN_DISABLE;
goto release_and_return_error;
}
how_many++;
}
return PARSEC_HOOK_RETURN_DONE;
return how_many;
}

PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,
Expand Down