Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TL/UCP: Allow self copy in allgather using network loopback #1021

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/components/tl/ucp/allgather/allgather.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,37 @@ char *ucc_tl_ucp_allgather_score_str_get(ucc_tl_ucp_team_t *team)
UCC_TL_UCP_ALLGATHER_DEFAULT_ALG_SELECT_STR, algo_num);
return str;
}

ucc_status_t loopback_self_copy(void *rbuf, void *sbuf, size_t data_size,
ucc_memory_type_t rmem, ucc_memory_type_t smem,
ucc_rank_t rank, ucc_tl_ucp_team_t *team,
ucc_tl_ucp_task_t *task)
{
ucc_status_t status;
status = ucc_tl_ucp_send_nb(sbuf, data_size, smem, rank, team, task);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of status = ... ; if (UCC_OK != status) ... you can use UCC_CHECK_GOTO(..., err)

if (UCC_OK != status) {
task->super.status = status;
return task->super.status;
}
status = ucc_tl_ucp_recv_nb(rbuf, data_size, rmem, rank, team, task);
if (UCC_OK != status) {
task->super.status = status;
return task->super.status;
}
return UCC_OK;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

err:
    return status;

}
ucc_status_t allgather_copy(void *rbuf, void *sbuf, size_t data_size,
ucc_memory_type_t rmem, ucc_memory_type_t smem,
ucc_rank_t rank, ucc_tl_ucp_team_t *team,
ucc_tl_ucp_task_t *task)
{
ucc_status_t status;
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback;
if (use_loopback) {
status = loopback_self_copy(rbuf, sbuf, data_size, rmem, smem, rank,
team, task);
} else {
status = ucc_mc_memcpy(rbuf, sbuf, data_size, rmem, smem);
}
return status;
}
11 changes: 11 additions & 0 deletions src/components/tl/ucp/allgather/allgather.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define ALLGATHER_H_
#include "../tl_ucp.h"
#include "../tl_ucp_coll.h"
#include "tl_ucp_sendrecv.h"

enum {
UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL,
Expand Down Expand Up @@ -38,6 +39,16 @@ static inline int ucc_tl_ucp_allgather_alg_from_str(const char *str)

ucc_status_t ucc_tl_ucp_allgather_init(ucc_tl_ucp_task_t *task);

ucc_status_t loopback_self_copy(void *rbuf, void *sbuf, size_t data_size,
ucc_memory_type_t rmem, ucc_memory_type_t smem,
ucc_rank_t rank, ucc_tl_ucp_team_t *team,
ucc_tl_ucp_task_t *task);

ucc_status_t allgather_copy(void *rbuf, void *sbuf, size_t data_size,
ucc_memory_type_t rmem, ucc_memory_type_t smem,
ucc_rank_t rank, ucc_tl_ucp_team_t *team,
ucc_tl_ucp_task_t *task);

/* Ring */
ucc_status_t ucc_tl_ucp_allgather_ring_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand Down
66 changes: 43 additions & 23 deletions src/components/tl/ucp/allgather/allgather_knomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "coll_patterns/sra_knomial.h"
#include "utils/ucc_math.h"
#include "utils/ucc_coll_utils.h"
#include "allgather.h"

#define SAVE_STATE(_phase) \
do { \
Expand Down Expand Up @@ -54,8 +55,7 @@

void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
{
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task,
ucc_tl_ucp_task_t);
ucc_tl_ucp_task_t * task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*task

ucc_coll_args_t *args = &TASK_ARGS(task);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
ucc_kn_radix_t radix = task->allgather_kn.p.radix;
Expand All @@ -66,10 +66,10 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
size_t dt_size = ucc_dt_size(GET_DT(args));
ucc_rank_t size = task->subset.map.ep_num;
size_t data_size = GET_TOTAL_COUNT(args, size);
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ?
args->root : 0;
ucc_rank_t rank = VRANK(task->subset.myrank, broot, size);
size_t local = GET_LOCAL_COUNT(args, size, rank);
ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? args->root : 0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

align =

ucc_rank_t rank = VRANK(task->subset.myrank, broot, size);
size_t local = GET_LOCAL_COUNT(args, size, rank);
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback;
void *sbuf;
ptrdiff_t peer_seg_offset, local_seg_offset;
ucc_rank_t peer, peer_dist;
Expand All @@ -78,8 +78,14 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
ucc_status_t status;
size_t extra_count;

EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test",
task->allgather_kn.etask);
if (use_loopback) {
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
return;
}
} else {
EXEC_TASK_TEST(UCC_KN_PHASE_INIT, "failed during ee task test",
task->allgather_kn.etask);
}
task->allgather_kn.etask = NULL;
UCC_KN_GOTO_PHASE(task->allgather_kn.phase);
if (KN_NODE_EXTRA == node_type) {
Expand Down Expand Up @@ -209,6 +215,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
ct == UCC_COLL_TYPE_BCAST ?
args->root : 0, size);
ucc_ee_executor_task_args_t eargs = {0};
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback;
ucc_status_t status;
ptrdiff_t offset;
ucc_ee_executor_t *exec;
Expand All @@ -225,21 +232,34 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
ucc_dt_size(args->dst.info.datatype);
rbuf = args->dst.info.buffer;
if (!UCC_IS_INPLACE(*args)) {
status = ucc_coll_task_get_executor(&task->super, &exec);
if (ucc_unlikely(status != UCC_OK)) {
task->super.status = status;
return status;
}
eargs.task_type = UCC_EE_EXECUTOR_TASK_COPY;
eargs.copy.dst = PTR_OFFSET(args->dst.info.buffer, offset);
eargs.copy.src = args->src.info.buffer;
eargs.copy.len = args->src.info.count *
ucc_dt_size(args->src.info.datatype);
status = ucc_ee_executor_task_post(exec, &eargs,
&task->allgather_kn.etask);
if (ucc_unlikely(status != UCC_OK)) {
task->super.status = status;
return status;
if (use_loopback) {
status = loopback_self_copy(
PTR_OFFSET(args->dst.info.buffer, offset),
args->src.info.buffer,
args->src.info.count * ucc_dt_size(args->src.info.datatype),
args->dst.info.mem_type, args->src.info.mem_type, rank,
team, task);
if (ucc_unlikely(status != UCC_OK)) {
return status;
}
} else {
/* Executer */
status = ucc_coll_task_get_executor(&task->super, &exec);
if (ucc_unlikely(status != UCC_OK)) {
task->super.status = status;
return status;
}
eargs.task_type = UCC_EE_EXECUTOR_TASK_COPY;
eargs.copy.dst = PTR_OFFSET(args->dst.info.buffer, offset);
eargs.copy.src = args->src.info.buffer;
eargs.copy.len =
args->src.info.count * ucc_dt_size(args->src.info.datatype);
status = ucc_ee_executor_task_post(exec, &eargs,
&task->allgather_kn.etask);
if (ucc_unlikely(status != UCC_OK)) {
task->super.status = status;
return status;
}
}
}
} else if (ct == UCC_COLL_TYPE_ALLGATHERV) {
Expand Down
22 changes: 18 additions & 4 deletions src/components/tl/ucp/allgather/allgather_neighbor.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,11 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task)
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype;
size_t count = TASK_ARGS(task).dst.info.count;
size_t data_size = (count / tsize) * ucc_dt_size(dt);
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback;
ucc_rank_t neighbors[2], i;
int i_parity, even_rank;
void *tmprecv, *tmpsend;
int counter;

if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
return;
Expand All @@ -98,8 +100,13 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task)
neighbors[1] = (trank + 1) % tsize;
}

while (task->tagged.send_posted < (tsize / 2)) {
i = task->tagged.send_posted;
if ((!UCC_IS_INPLACE(TASK_ARGS(task))) && use_loopback) {
counter = task->tagged.send_posted - 1;
} else {
counter = task->tagged.send_posted;
}
while (counter < (tsize / 2)) {
i = counter;
i_parity = i % 2;

tmprecv =
Expand All @@ -118,6 +125,11 @@ void ucc_tl_ucp_allgather_neighbor_progress(ucc_coll_task_t *coll_task)
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
return;
}
if ((!UCC_IS_INPLACE(TASK_ARGS(task))) && use_loopback) {
counter = task->tagged.send_posted - 1;
} else {
counter = task->tagged.send_posted;
}
}

ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task));
Expand Down Expand Up @@ -150,13 +162,15 @@ ucc_status_t ucc_tl_ucp_allgather_neighbor_start(ucc_coll_task_t *coll_task)
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS);

if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
data_size, rmem, smem);
status = allgather_copy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
data_size, rmem, smem, trank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
return status;
}
}

while (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just checking: is this call safe in the "inplace" case ?

}
if (trank % 2) {
neighbor = (trank - 1 + tsize) % tsize;
} else {
Expand Down
32 changes: 18 additions & 14 deletions src/components/tl/ucp/allgather/allgather_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@ static ucc_rank_t ucc_tl_ucp_allgather_ring_get_recv_block(ucc_subset_t *subset,

void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task)
{
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
ucc_rank_t trank = task->subset.myrank;
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num;
void *rbuf = TASK_ARGS(task).dst.info.buffer;
ucc_memory_type_t rmem = TASK_ARGS(task).dst.info.mem_type;
size_t count = TASK_ARGS(task).dst.info.count;
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype;
size_t data_size = (count / tsize) * ucc_dt_size(dt);
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
ucc_rank_t trank = task->subset.myrank;
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num;
void * rbuf = TASK_ARGS(task).dst.info.buffer;
ucc_memory_type_t rmem = TASK_ARGS(task).dst.info.mem_type;
size_t count = TASK_ARGS(task).dst.info.count;
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype;
size_t data_size = (count / tsize) * ucc_dt_size(dt);
int use_loopback = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_loopback;
ucc_rank_t sendto, recvfrom, sblock, rblock;
int step;
void *buf;
Expand All @@ -49,9 +50,10 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task)
}
sendto = ucc_ep_map_eval(task->subset.map, (trank + 1) % tsize);
recvfrom = ucc_ep_map_eval(task->subset.map, (trank - 1 + tsize) % tsize);
step =
use_loopback ? task->tagged.send_posted - 1 : task->tagged.send_posted;

while (task->tagged.send_posted < tsize - 1) {
step = task->tagged.send_posted;
while (step < tsize - 1) {
sblock = task->allgather_ring.get_send_block(&task->subset, trank,
tsize, step);
rblock = task->allgather_ring.get_recv_block(&task->subset, trank,
Expand All @@ -67,6 +69,8 @@ void ucc_tl_ucp_allgather_ring_progress(ucc_coll_task_t *coll_task)
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
return;
}
step = use_loopback ? task->tagged.send_posted - 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this line check for inplace as well? If use_loopback is true and inplace is false, will step be incorrect?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar for neighbor exchange

: task->tagged.send_posted;
}
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task));
task->super.status = UCC_OK;
Expand All @@ -86,6 +90,7 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task)
ucc_datatype_t dt = TASK_ARGS(task).dst.info.datatype;
ucc_rank_t trank = task->subset.myrank;
ucc_rank_t tsize = (ucc_rank_t)task->subset.map.ep_num;
ucc_rank_t rank = ucc_ep_map_eval(task->subset.map, trank);
size_t data_size = (count / tsize) * ucc_dt_size(dt);
ucc_status_t status;
ucc_rank_t block;
Expand All @@ -96,13 +101,12 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task)
if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
block = task->allgather_ring.get_send_block(&task->subset, trank, tsize,
0);
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * block),
sbuf, data_size, rmem, smem);
status = allgather_copy(PTR_OFFSET(rbuf, data_size * block), sbuf,
data_size, rmem, smem, rank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
return status;
}
}

return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super);
}

Expand Down
4 changes: 2 additions & 2 deletions src/components/tl/ucp/allgather/allgather_sparbit.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ ucc_status_t ucc_tl_ucp_allgather_sparbit_start(ucc_coll_task_t *coll_task)
task->allgather_sparbit.data_expected = 1;

if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
data_size, rmem, smem);
status = allgather_copy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
data_size, rmem, smem, trank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
return status;
}
Expand Down
14 changes: 9 additions & 5 deletions src/components/tl/ucp/tl_ucp.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = {
ucc_offsetof(ucc_tl_ucp_lib_config_t, alltoallv_pairwise_num_posts),
UCC_CONFIG_TYPE_ULUNITS},

/* TODO: add radix to config once it's fully supported by the algorithm
/* TODO: add radix to config once it's fully supported by the algorithm
{"ALLTOALLV_HYBRID_RADIX", "2",
"Radix of the Hybrid Alltoallv algorithm",
ucc_offsetof(ucc_tl_ucp_lib_config_t, alltoallv_hybrid_radix),
Expand Down Expand Up @@ -140,6 +140,12 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = {
ucc_offsetof(ucc_tl_ucp_lib_config_t, allgather_kn_radix),
UCC_CONFIG_TYPE_UINT},

{"ALLGATHER_USE_LOOPBACK", "0",
"If set to 1 performs network loopback for self copy, otherwise uses mc "
"cuda copy",
Comment on lines +144 to +145
Copy link
Collaborator

@samnordmann samnordmann Feb 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"If set to 1 performs network loopback for self copy, otherwise uses mc "
"cuda copy",
"If set to 1 performs network loopback for self copy, otherwise uses mc",

it is not necessarily cuda

ucc_offsetof(ucc_tl_ucp_lib_config_t, allgather_use_loopback),
UCC_CONFIG_TYPE_BOOL},

{"BCAST_KN_RADIX", "4", "Radix of the recursive-knomial bcast algorithm",
ucc_offsetof(ucc_tl_ucp_lib_config_t, bcast_kn_radix),
UCC_CONFIG_TYPE_UINT},
Expand Down Expand Up @@ -196,10 +202,8 @@ ucc_config_field_t ucc_tl_ucp_lib_config_table[] = {
ucc_offsetof(ucc_tl_ucp_lib_config_t, reduce_scatterv_ring_bidirectional),
UCC_CONFIG_TYPE_BOOL},

{"USE_TOPO", "try",
"Allow usage of tl ucp topo",
ucc_offsetof(ucc_tl_ucp_lib_config_t, use_topo),
UCC_CONFIG_TYPE_TERNARY},
{"USE_TOPO", "try", "Allow usage of tl ucp topo",
ucc_offsetof(ucc_tl_ucp_lib_config_t, use_topo), UCC_CONFIG_TYPE_TERNARY},

{"RANKS_REORDERING", "y",
"Use topology information in TL UCP to reorder ranks. Requires topo info",
Expand Down
1 change: 1 addition & 0 deletions src/components/tl/ucp/tl_ucp.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ typedef struct ucc_tl_ucp_lib_config {
ucc_mrange_uint_t allreduce_sra_kn_radix;
uint32_t reduce_scatter_kn_radix;
uint32_t allgather_kn_radix;
int allgather_use_loopback;
uint32_t bcast_kn_radix;
ucc_mrange_uint_t bcast_sag_kn_radix;
uint32_t reduce_kn_radix;
Expand Down
Loading
Loading