|
40 | 40 |
|
41 | 41 | #if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) |
42 | 42 | #include "parsec/mca/device/cuda/device_cuda.h" |
| 43 | +#include "parsec/sys/tls.h" |
| 44 | + |
| 45 | +extern PARSEC_TLS_DECLARE(co_manager_tls); |
| 46 | + |
43 | 47 | #endif /* defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) */ |
44 | 48 |
|
45 | 49 | #include "parsec/mca/mca_repository.h" |
@@ -224,8 +228,8 @@ static int parsec_dtd_taskpool_leave_wait(parsec_taskpool_t* tp, void*_) |
224 | 228 | parsec_termdet_open_module(tp, "local"); |
225 | 229 | tp->tdm.module->monitor_taskpool(tp, parsec_taskpool_termination_detected); |
226 | 230 | tp->tdm.module->taskpool_set_nb_tasks(tp, 0); |
227 | | - tp->tdm.module->taskpool_set_runtime_actions(tp, 0); |
228 | | - |
| 231 | + tp->tdm.module->taskpool_set_runtime_actions(tp, 0); |
| 232 | + |
229 | 233 | /* We are re-attached to the context */ |
230 | 234 | parsec_atomic_fetch_inc_int32(&tp->context->active_taskpools); |
231 | 235 | return PARSEC_SUCCESS; |
@@ -721,7 +725,7 @@ parsec_dtd_add_profiling_info(parsec_taskpool_t *tp, |
721 | 725 | } |
722 | 726 |
|
723 | 727 | void |
724 | | -parsec_dtd_add_profiling_info_generic(parsec_taskpool_t *tp, |
| 728 | +parsec_dtd_add_profiling_info_generic(parsec_taskpool_t *tp, |
725 | 729 | const char *name, |
726 | 730 | int *keyin, int *keyout) |
727 | 731 | { |
@@ -1002,7 +1006,7 @@ parsec_dtd_insert_task_class(parsec_dtd_taskpool_t *tp, |
1002 | 1006 | } else { |
1003 | 1007 | char *fc = fill_color(tc->super.task_class_id, PARSEC_DTD_NB_TASK_CLASSES); |
1004 | 1008 | parsec_profiling_add_dictionary_keyword(tc->super.name, fc, |
1005 | | - sizeof(parsec_task_prof_info_t)+info_size, |
| 1009 | + sizeof(parsec_task_prof_info_t)+info_size, |
1006 | 1010 | info_str, |
1007 | 1011 | (int *)&PARSEC_PROF_FUNC_KEY_START(&tp->super, tc->super.task_class_id), |
1008 | 1012 | (int *)&PARSEC_PROF_FUNC_KEY_END(&tp->super, tc->super.task_class_id)); |
@@ -1789,9 +1793,63 @@ parsec_dtd_release_deps(parsec_execution_stream_t *es, |
1789 | 1793 |
|
1790 | 1794 | /* Scheduling tasks */ |
1791 | 1795 | if( action_mask & PARSEC_ACTION_RELEASE_LOCAL_DEPS ) { |
1792 | | - __parsec_schedule_vp(es, arg.ready_lists, 0); |
1793 | | - } |
| 1796 | + int nb_task_rings = es->virtual_process->parsec_context->nb_vp; |
| 1797 | + |
| 1798 | + /* Iterating through the task rings */ |
| 1799 | + for(int vp = 0; vp < nb_task_rings; vp++ ){ |
| 1800 | + const parsec_vp_t** vps = (const parsec_vp_t**)es->virtual_process->parsec_context->virtual_processes; |
| 1801 | + parsec_execution_stream_t* target_es = vps[vp]->execution_streams[0]; |
| 1802 | +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) |
| 1803 | + parsec_device_module_t** co_manager_tls_val = PARSEC_TLS_GET_SPECIFIC(co_manager_tls); |
| 1804 | + |
| 1805 | + if( co_manager_tls_val != NULL ) { |
| 1806 | + /* I am the co-manager */ |
| 1807 | + |
| 1808 | + parsec_task_t* task_ring = arg.ready_lists[vp]; |
| 1809 | + parsec_task_t* current_task = task_ring; |
| 1810 | +#if defined(PARSEC_DEBUG_NOISIER) |
| 1811 | + char tmp[MAX_TASK_STRLEN]; |
| 1812 | +#endif |
| 1813 | + /* iterate through the single tasks */ |
| 1814 | + while ( task_ring != NULL ) |
| 1815 | + { |
| 1816 | + task_ring = (parsec_task_t*)parsec_list_item_ring_chop( ¤t_task->super ); |
| 1817 | + parsec_list_item_singleton( (parsec_list_item_t*)current_task ); |
| 1818 | + |
| 1819 | + if (PARSEC_DTD_FLUSH_TC_ID == current_task->task_class->task_class_id) |
| 1820 | + { |
| 1821 | + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,"GPU[%s]: Thread %d scheduling task %s at %s:%d", |
| 1822 | + ((parsec_device_module_t*)*co_manager_tls_val)->name, es->th_id, |
| 1823 | + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, current_task), __FILE__, __LINE__); |
| 1824 | + __parsec_schedule(target_es, current_task, 0); |
| 1825 | + } |
| 1826 | + else |
| 1827 | + { |
| 1828 | + /* try to skip the scheduler */ |
| 1829 | + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,"GPU[%s]: Thread %d try executing task %s %p at %s:%d", |
| 1830 | + ((parsec_device_module_t*)*co_manager_tls_val)->name, es->th_id, |
| 1831 | + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, current_task), current_task, __FILE__, __LINE__); |
| 1832 | + int rc = __parsec_execute(target_es, current_task); |
| 1833 | + if( rc != PARSEC_HOOK_RETURN_ASYNC ){ |
| 1834 | + /* failed to shortcut, scheduling normally */ |
| 1835 | + PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream,"GPU[%s]: Thread %d resort to scheduling task %s %p at %s:%d", |
| 1836 | + ((parsec_device_module_t*)*co_manager_tls_val)->name, es->th_id, |
| 1837 | + parsec_task_snprintf(tmp, MAX_TASK_STRLEN, current_task), current_task, __FILE__, __LINE__); |
| 1838 | + __parsec_schedule(target_es, current_task, 0); |
| 1839 | + } |
1794 | 1840 |
|
| 1841 | + } |
| 1842 | + current_task = task_ring; |
| 1843 | + } |
| 1844 | + arg.ready_lists[vp] = NULL; |
| 1845 | + } |
| 1846 | +#endif /* PARSEC_HAVE_DEV_CUDA_SUPPORT */ |
| 1847 | + if(arg.ready_lists[vp] != NULL ) { |
| 1848 | + __parsec_schedule(target_es, arg.ready_lists[vp], 0); |
| 1849 | + arg.ready_lists[vp] = NULL; |
| 1850 | + } |
| 1851 | + } |
| 1852 | + } |
1795 | 1853 | PARSEC_PINS(es, RELEASE_DEPS_END, this_task); |
1796 | 1854 | return 0; |
1797 | 1855 | } |
@@ -2127,7 +2185,7 @@ parsec_dtd_create_task_classv(const char *name, |
2127 | 2185 | (flow_count * sizeof(parsec_dtd_descendant_info_t)) + |
2128 | 2186 | (flow_count * sizeof(parsec_dtd_flow_info_t)) + |
2129 | 2187 | (nb_params * sizeof(parsec_dtd_task_param_t)) + |
2130 | | - total_size_of_param); |
| 2188 | + total_size_of_param); |
2131 | 2189 |
|
2132 | 2190 | parsec_mempool_construct(&dtd_tc->context_mempool, |
2133 | 2191 | PARSEC_OBJ_CLASS(parsec_dtd_task_t), total_size, |
|
0 commit comments