-
Notifications
You must be signed in to change notification settings - Fork 103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TL/UCP: Allow self copy in allgather using network loopback #1021
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,3 +58,37 @@ char *ucc_tl_ucp_allgather_score_str_get(ucc_tl_ucp_team_t *team) | |
UCC_TL_UCP_ALLGATHER_DEFAULT_ALG_SELECT_STR, algo_num); | ||
return str; | ||
} | ||
|
||
ucc_status_t loopback_self_copy(void *rbuf, void *sbuf, size_t data_size, | ||
ucc_memory_type_t rmem, ucc_memory_type_t smem, | ||
ucc_rank_t rank, ucc_tl_ucp_team_t *team, | ||
ucc_tl_ucp_task_t *task) | ||
{ | ||
ucc_status_t status; | ||
status = ucc_tl_ucp_send_nb(sbuf, data_size, smem, rank, team, task); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return task->super.status; | ||
} | ||
status = ucc_tl_ucp_recv_nb(rbuf, data_size, rmem, rank, team, task); | ||
if (UCC_OK != status) { | ||
task->super.status = status; | ||
return task->super.status; | ||
} | ||
return UCC_OK; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
ucc_status_t allgather_copy(void *rbuf, void *sbuf, size_t data_size, | ||
ucc_memory_type_t rmem, ucc_memory_type_t smem, | ||
ucc_rank_t rank, ucc_tl_ucp_team_t *team, | ||
ucc_tl_ucp_task_t *task) | ||
{ | ||
ucc_status_t status; | ||
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback; | ||
if (use_loopback) { | ||
status = loopback_self_copy(rbuf, sbuf, data_size, rmem, smem, rank, | ||
team, task); | ||
} else { | ||
status = ucc_mc_memcpy(rbuf, sbuf, data_size, rmem, smem); | ||
} | ||
return status; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
#include "coll_patterns/sra_knomial.h" | ||
#include "utils/ucc_math.h" | ||
#include "utils/ucc_coll_utils.h" | ||
#include "allgather.h" | ||
|
||
#define SAVE_STATE(_phase) \ | ||
do { \ | ||
|
@@ -54,8 +55,7 @@ | |
|
||
void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | ||
{ | ||
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, | ||
ucc_tl_ucp_task_t); | ||
ucc_tl_ucp_task_t * task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. *task |
||
ucc_coll_args_t *args = &TASK_ARGS(task); | ||
ucc_tl_ucp_team_t *team = TASK_TEAM(task); | ||
ucc_kn_radix_t radix = task->allgather_kn.p.radix; | ||
|
@@ -66,10 +66,10 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
size_t dt_size = ucc_dt_size(GET_DT(args)); | ||
ucc_rank_t size = task->subset.map.ep_num; | ||
size_t data_size = GET_TOTAL_COUNT(args, size); | ||
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? | ||
args->root : 0; | ||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? args->root : 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. align = |
||
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); | ||
size_t local = GET_LOCAL_COUNT(args, size, rank); | ||
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback; | ||
void *sbuf; | ||
ptrdiff_t peer_seg_offset, local_seg_offset; | ||
ucc_rank_t peer, peer_dist; | ||
|
@@ -78,8 +78,14 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) | |
ucc_status_t status; | ||
size_t extra_count; | ||
|
||
EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test", | ||
task->allgather_kn.etask); | ||
if (use_loopback) { | ||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
return; | ||
} | ||
} else { | ||
EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test", | ||
task->allgather_kn.etask); | ||
} | ||
task->allgather_kn.etask = NULL; | ||
UCC_KN_GOTO_PHASE(task->allgather_kn.phase); | ||
if (KN_NODE_EXTRA == node_type) { | ||
|
@@ -209,6 +215,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
ct == UCC_COLL_TYPE_BCAST ? | ||
args->root : 0, size); | ||
ucc_ee_executor_task_args_t eargs = {0}; | ||
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback; | ||
ucc_status_t status; | ||
ptrdiff_t offset; | ||
ucc_ee_executor_t *exec; | ||
|
@@ -225,21 +232,34 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) | |
ucc_dt_size(args->dst.info.datatype); | ||
rbuf = args->dst.info.buffer; | ||
if (!UCC_IS_INPLACE(*args)) { | ||
status = ucc_coll_task_get_executor(&task->super, &exec); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
} | ||
eargs.task_type = UCC_EE_EXECUTOR_TASK_COPY; | ||
eargs.copy.dst = PTR_OFFSET(args->dst.info.buffer, offset); | ||
eargs.copy.src = args->src.info.buffer; | ||
eargs.copy.len = args->src.info.count * | ||
ucc_dt_size(args->src.info.datatype); | ||
status = ucc_ee_executor_task_post(exec, &eargs, | ||
&task->allgather_kn.etask); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
if (use_loopback) { | ||
status = loopback_self_copy( | ||
PTR_OFFSET(args->dst.info.buffer, offset), | ||
args->src.info.buffer, | ||
args->src.info.count * ucc_dt_size(args->src.info.datatype), | ||
args->dst.info.mem_type, args->src.info.mem_type, rank, | ||
team, task); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
return status; | ||
} | ||
} else { | ||
/* Executer */ | ||
status = ucc_coll_task_get_executor(&task->super, &exec); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
} | ||
eargs.task_type = UCC_EE_EXECUTOR_TASK_COPY; | ||
eargs.copy.dst = PTR_OFFSET(args->dst.info.buffer, offset); | ||
eargs.copy.src = args->src.info.buffer; | ||
eargs.copy.len = | ||
args->src.info.count * ucc_dt_size(args->src.info.datatype); | ||
status = ucc_ee_executor_task_post(exec, &eargs, | ||
&task->allgather_kn.etask); | ||
if (ucc_unlikely(status != UCC_OK)) { | ||
task->super.status = status; | ||
return status; | ||
} | ||
} | ||
} | ||
} else if (ct == UCC_COLL_TYPE_ALLGATHERV) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,9 +81,11 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task) | |
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype; | ||
size_t count = TASK_ARGS(task).dst.info.count; | ||
size_t data_size = (count / tsize) * ucc_dt_size(dt); | ||
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback; | ||
ucc_rank_t neighbors[2], i; | ||
int i_parity, even_rank; | ||
void *tmprecv, *tmpsend; | ||
int counter; | ||
|
||
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
return; | ||
|
@@ -98,8 +100,13 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task) | |
neighbors[1] = (trank + 1) % tsize; | ||
} | ||
|
||
while (task->tagged.send_posted < (tsize / 2)) { | ||
i = task->tagged.send_posted; | ||
if ((!UCC_IS_INPLACE(TASK_ARGS(task))) && use_loopback) { | ||
counter = task->tagged.send_posted - 1; | ||
} else { | ||
counter = task->tagged.send_posted; | ||
} | ||
while (counter < (tsize / 2)) { | ||
i = counter; | ||
i_parity = i % 2; | ||
|
||
tmprecv = | ||
|
@@ -118,6 +125,11 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task) | |
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
return; | ||
} | ||
if ((!UCC_IS_INPLACE(TASK_ARGS(task))) && use_loopback) { | ||
counter = task->tagged.send_posted - 1; | ||
} else { | ||
counter = task->tagged.send_posted; | ||
} | ||
} | ||
|
||
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); | ||
|
@@ -150,13 +162,15 @@ ucc_status_t ucc_tl_ucp_allgather_neighbor_start(ucc_coll_task_t *coll_task) | |
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); | ||
|
||
if (!UCC_IS_INPLACE(TASK_ARGS(task))) { | ||
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf, | ||
data_size, rmem, smem); | ||
status = allgather_copy(PTR_OFFSET(rbuf, data_size * trank), sbuf, | ||
data_size, rmem, smem, trank, team, task); | ||
if (ucc_unlikely(UCC_OK != status)) { | ||
return status; | ||
} | ||
} | ||
|
||
while (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just checking: is this call safe in the "inplace" case ? |
||
} | ||
if (trank % 2) { | ||
neighbor = (trank - 1 + tsize) % tsize; | ||
} else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,15 +31,16 @@ static ucc_rank_t ucc_tl_ucp_allgather_ring_get_recv_block(ucc_subset_t *subset, | |
|
||
void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task) | ||
{ | ||
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); | ||
ucc_tl_ucp_team_t *team = TASK_TEAM(task); | ||
ucc_rank_t trank = task->subset.myrank; | ||
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num; | ||
void *rbuf = TASK_ARGS(task).dst.info.buffer; | ||
ucc_memory_type_t rmem = TASK_ARGS(task).dst.info.mem_type; | ||
size_t count = TASK_ARGS(task).dst.info.count; | ||
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype; | ||
size_t data_size = (count / tsize) * ucc_dt_size(dt); | ||
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); | ||
ucc_tl_ucp_team_t *team = TASK_TEAM(task); | ||
ucc_rank_t trank = task->subset.myrank; | ||
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num; | ||
void * rbuf = TASK_ARGS(task).dst.info.buffer; | ||
ucc_memory_type_t rmem = TASK_ARGS(task).dst.info.mem_type; | ||
size_t count = TASK_ARGS(task).dst.info.count; | ||
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype; | ||
size_t data_size = (count / tsize) * ucc_dt_size(dt); | ||
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback; | ||
ucc_rank_t sendto, recvfrom, sblock, rblock; | ||
int step; | ||
void *buf; | ||
|
@@ -49,9 +50,10 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task) | |
} | ||
sendto = ucc_ep_map_eval(task->subset.map, (trank + 1) % tsize); | ||
recvfrom = ucc_ep_map_eval(task->subset.map, (trank - 1 + tsize) % tsize); | ||
step = | ||
use_loopback ? task->tagged.send_posted - 1 : task->tagged.send_posted; | ||
|
||
while (task->tagged.send_posted < tsize - 1) { | ||
step = task->tagged.send_posted; | ||
while (step < tsize - 1) { | ||
sblock = task->allgather_ring.get_send_block(&task->subset, trank, | ||
tsize, step); | ||
rblock = task->allgather_ring.get_recv_block(&task->subset, trank, | ||
|
@@ -67,6 +69,8 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task) | |
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) { | ||
return; | ||
} | ||
step = use_loopback ? task->tagged.send_posted - 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this line check for inplace as well? If use_loopback is true and inplace is false, will step be incorrect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar for neighbor exchange |
||
: task->tagged.send_posted; | ||
} | ||
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); | ||
task->super.status = UCC_OK; | ||
|
@@ -86,6 +90,7 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task) | |
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype; | ||
ucc_rank_t trank = task->subset.myrank; | ||
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num; | ||
ucc_rank_t rank = ucc_ep_map_eval(task->subset.map, trank); | ||
size_t data_size = (count / tsize) * ucc_dt_size(dt); | ||
ucc_status_t status; | ||
ucc_rank_t block; | ||
|
@@ -96,13 +101,12 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task) | |
if (!UCC_IS_INPLACE(TASK_ARGS(task))) { | ||
block = task->allgather_ring.get_send_block(&task->subset, trank, tsize, | ||
0); | ||
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * block), | ||
sbuf, data_size, rmem, smem); | ||
status = allgather_copy(PTR_OFFSET(rbuf, data_size * block), sbuf, | ||
data_size, rmem, smem, rank, team, task); | ||
if (ucc_unlikely(UCC_OK != status)) { | ||
return status; | ||
} | ||
} | ||
|
||
return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -48,7 +48,7 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = { | |||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, alltoallv_pairwise_num_posts), | ||||||||
UCC_CONFIG_TYPE_ULUNITS}, | ||||||||
|
||||||||
/* TODO: add radix to config once it's fully supported by the algorithm | ||||||||
/* TODO: add radix to config once it's fully supported by the algorithm | ||||||||
{"ALLTOALLV_HYBRID_RADIX", "2", | ||||||||
"Radix of the Hybrid Alltoallv algorithm", | ||||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, alltoallv_hybrid_radix), | ||||||||
|
@@ -140,6 +140,12 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = { | |||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, allgather_kn_radix), | ||||||||
UCC_CONFIG_TYPE_UINT}, | ||||||||
|
||||||||
{"ALLGATHER_USE_LOOPBACK", "0", | ||||||||
"If set to 1 performs network loopback for self copy, otherwise uses mc " | ||||||||
"cuda copy", | ||||||||
Comment on lines
+144
to
+145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
it is not necessarily cuda |
||||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, allgather_use_loopback), | ||||||||
UCC_CONFIG_TYPE_BOOL}, | ||||||||
|
||||||||
{"BCAST_KN_RADIX", "4", "Radix of the recursive-knomial bcast algorithm", | ||||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, bcast_kn_radix), | ||||||||
UCC_CONFIG_TYPE_UINT}, | ||||||||
|
@@ -196,10 +202,8 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = { | |||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, reduce_scatterv_ring_bidirectional), | ||||||||
UCC_CONFIG_TYPE_BOOL}, | ||||||||
|
||||||||
{"USE_TOPO", "try", | ||||||||
"Allow usage of tl ucp topo", | ||||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, use_topo), | ||||||||
UCC_CONFIG_TYPE_TERNARY}, | ||||||||
{"USE_TOPO", "try", "Allow usage of tl ucp topo", | ||||||||
ucc_offsetof(ucc_tl_ucp_lib_config_t, use_topo), UCC_CONFIG_TYPE_TERNARY}, | ||||||||
|
||||||||
{"RANKS_REORDERING", "y", | ||||||||
"Use topology information in TL UCP to reorder ranks. Requires topo info", | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of
status = ... ; if (UCC_OK != status) ...
you can use UCC_CHECK_GOTO(..., err)