diff --git a/src/components/tl/cuda/tl_cuda.c b/src/components/tl/cuda/tl_cuda.c index 5bfb8d0da5..2e5ade2077 100644 --- a/src/components/tl/cuda/tl_cuda.c +++ b/src/components/tl/cuda/tl_cuda.c @@ -65,13 +65,14 @@ ucc_status_t ucc_tl_cuda_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); ucc_status_t ucc_tl_cuda_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_cuda_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_cuda_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_cuda_context_t, ucc_base_context_t, const ucc_base_context_params_t *, diff --git a/src/components/tl/cuda/tl_cuda_context.c b/src/components/tl/cuda/tl_cuda_context.c index 33ccf62f99..ceb38c211c 100644 --- a/src/components/tl/cuda/tl_cuda_context.c +++ b/src/components/tl/cuda/tl_cuda_context.c @@ -77,19 +77,19 @@ UCC_CLASS_INIT_FUNC(ucc_tl_cuda_context_t, } ucc_status_t ucc_tl_cuda_mem_map(const ucc_base_context_t *context, - void *address, - size_t len, - void *memh) + void *address, size_t len, void *memh) { return UCC_ERR_NOT_IMPLEMENTED; } -ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, void *memh) +ucc_status_t ucc_tl_cuda_mem_unmap(const ucc_base_context_t *context, + void *memh) { return UCC_ERR_NOT_IMPLEMENTED; } -ucc_status_t ucc_tl_cuda_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_cuda_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_IMPLEMENTED; } diff --git a/src/components/tl/mlx5/tl_mlx5.c b/src/components/tl/mlx5/tl_mlx5.c index f48e6fc3eb..084d8855bc 100644 --- a/src/components/tl/mlx5/tl_mlx5.c +++ b/src/components/tl/mlx5/tl_mlx5.c @@ -12,13 +12,14 @@ ucc_status_t ucc_tl_mlx5_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t * base_attr); ucc_status_t ucc_tl_mlx5_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_mlx5_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_mlx5_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); ucc_status_t ucc_tl_mlx5_get_lib_properties(ucc_base_lib_properties_t *prop); diff --git a/src/components/tl/mlx5/tl_mlx5_context.c b/src/components/tl/mlx5/tl_mlx5_context.c index 71fdd36acd..cc6f14010c 100644 --- a/src/components/tl/mlx5/tl_mlx5_context.c +++ b/src/components/tl/mlx5/tl_mlx5_context.c @@ -306,19 +306,20 @@ ucc_status_t ucc_tl_mlx5_context_create_epilog(ucc_base_context_t *context) } ucc_status_t ucc_tl_mlx5_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h) + void *address, size_t len, void *memh, + void *tl_h) { return UCC_ERR_NOT_IMPLEMENTED; } -ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_mlx5_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { return UCC_ERR_NOT_IMPLEMENTED; } -ucc_status_t ucc_tl_mlx5_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_mlx5_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_IMPLEMENTED; } diff --git a/src/components/tl/nccl/tl_nccl.c b/src/components/tl/nccl/tl_nccl.c index 4c82db7574..2016a7a70c 100644 --- a/src/components/tl/nccl/tl_nccl.c +++ b/src/components/tl/nccl/tl_nccl.c @@ -17,13 +17,14 @@ ucc_status_t ucc_tl_nccl_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); ucc_status_t ucc_tl_nccl_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_nccl_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_nccl_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); static ucc_config_field_t ucc_tl_nccl_lib_config_table[] = { {"", "", NULL, ucc_offsetof(ucc_tl_nccl_lib_config_t, super), diff --git a/src/components/tl/nccl/tl_nccl_context.c b/src/components/tl/nccl/tl_nccl_context.c index 8a3c03c048..f5c492ffc8 100644 --- a/src/components/tl/nccl/tl_nccl_context.c +++ b/src/components/tl/nccl/tl_nccl_context.c @@ -228,19 +228,20 @@ ucc_tl_nccl_get_context_attr(const ucc_base_context_t *context, /* NOLINT */ } ucc_status_t ucc_tl_nccl_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h) + void *address, size_t len, void *memh, + void *tl_h) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_nccl_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_nccl_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_nccl_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_SUPPORTED; } diff --git a/src/components/tl/rccl/tl_rccl.c b/src/components/tl/rccl/tl_rccl.c index e12ab28251..857732148c 100644 --- a/src/components/tl/rccl/tl_rccl.c +++ b/src/components/tl/rccl/tl_rccl.c @@ -18,13 +18,14 @@ ucc_status_t ucc_tl_rccl_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); ucc_status_t ucc_tl_rccl_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_rccl_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_rccl_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); static ucc_config_field_t ucc_tl_rccl_lib_config_table[] = { {"", "", NULL, ucc_offsetof(ucc_tl_rccl_lib_config_t, super), diff --git a/src/components/tl/rccl/tl_rccl_context.c b/src/components/tl/rccl/tl_rccl_context.c index 76e1903749..d7123a1ab2 100644 --- a/src/components/tl/rccl/tl_rccl_context.c +++ b/src/components/tl/rccl/tl_rccl_context.c @@ -125,19 +125,20 @@ ucc_tl_rccl_get_context_attr(const ucc_base_context_t *context, /* NOLINT */ } ucc_status_t ucc_tl_rccl_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h) + void *address, size_t len, void *memh, + void *tl_h) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_rccl_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_rccl_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_rccl_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_SUPPORTED; } diff --git a/src/components/tl/self/tl_self.c b/src/components/tl/self/tl_self.c index 334c0409ea..ca4e6be6f3 100644 --- a/src/components/tl/self/tl_self.c +++ b/src/components/tl/self/tl_self.c @@ -17,13 +17,14 @@ ucc_status_t ucc_tl_self_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); ucc_status_t ucc_tl_self_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_self_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_self_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); ucc_status_t ucc_tl_self_get_lib_properties(ucc_base_lib_properties_t *prop); diff --git a/src/components/tl/self/tl_self_context.c b/src/components/tl/self/tl_self_context.c index 53ab1299dc..4a234c802f 100644 --- a/src/components/tl/self/tl_self_context.c +++ b/src/components/tl/self/tl_self_context.c @@ -49,20 +49,21 @@ ucc_tl_self_get_context_attr(const ucc_base_context_t *context, /* NOLINT */ return UCC_OK; } -ucc_status_t ucc_tl_self_mem_map(const ucc_base_context_t *context, - int type, void *address, - size_t len, - void *memh, void *tl_h) +ucc_status_t ucc_tl_self_mem_map(const ucc_base_context_t *context, int type, + void *address, size_t len, void *memh, + void *tl_h) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_self_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_self_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_self_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_SUPPORTED; } diff --git a/src/components/tl/sharp/tl_sharp.c b/src/components/tl/sharp/tl_sharp.c index 705f1aa3dd..cea76856f2 100644 --- a/src/components/tl/sharp/tl_sharp.c +++ b/src/components/tl/sharp/tl_sharp.c @@ -16,13 +16,14 @@ ucc_status_t ucc_tl_sharp_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); ucc_status_t ucc_tl_sharp_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h); + void *address, size_t len, void *memh, + void *tl_h); -ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); -ucc_status_t ucc_tl_sharp_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_sharp_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer); static ucc_config_field_t ucc_tl_sharp_lib_config_table[] = { {"", "", NULL, ucc_offsetof(ucc_tl_sharp_lib_config_t, super), diff --git a/src/components/tl/sharp/tl_sharp_context.c b/src/components/tl/sharp/tl_sharp_context.c index 5979fa5813..94bdf8bdf9 100644 --- a/src/components/tl/sharp/tl_sharp_context.c +++ b/src/components/tl/sharp/tl_sharp_context.c @@ -513,19 +513,20 @@ ucc_status_t ucc_tl_sharp_get_context_attr(const ucc_base_context_t *context, /* } ucc_status_t ucc_tl_sharp_mem_map(const ucc_base_context_t *context, int type, - void *address, - size_t len, - void *memh, void *tl_h) + void *address, size_t len, void *memh, + void *tl_h) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_sharp_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { return UCC_ERR_NOT_SUPPORTED; } -ucc_status_t ucc_tl_sharp_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_sharp_memh_pack(const ucc_base_context_t *context, + void *memh, void **pack_buffer) { return UCC_ERR_NOT_SUPPORTED; } diff --git a/src/components/tl/ucp/alltoall/alltoall_onesided.c b/src/components/tl/ucp/alltoall/alltoall_onesided.c index 649f7c010e..3a6666908b 100644 --- a/src/components/tl/ucp/alltoall/alltoall_onesided.c +++ b/src/components/tl/ucp/alltoall/alltoall_onesided.c @@ -15,17 +15,17 @@ void ucc_tl_ucp_alltoall_onesided_progress(ucc_coll_task_t *ctask); ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask) { - ucc_tl_ucp_task_t *task = ucc_derived_of(ctask, ucc_tl_ucp_task_t); - ucc_tl_ucp_team_t *team = TASK_TEAM(task); - ptrdiff_t src = (ptrdiff_t)TASK_ARGS(task).src.info.buffer; - ptrdiff_t dest = (ptrdiff_t)TASK_ARGS(task).dst.info.buffer; - size_t nelems = TASK_ARGS(task).src.info.count; - ucc_rank_t grank = UCC_TL_TEAM_RANK(team); - ucc_rank_t gsize = UCC_TL_TEAM_SIZE(team); - ucc_rank_t start = (grank + 1) % gsize; - long * pSync = TASK_ARGS(task).global_work_buffer; - ucc_mem_map_mem_h *src_memh = TASK_ARGS(task).src_memh.global_memh; - ucc_mem_map_mem_h *dst_memh = TASK_ARGS(task).dst_memh.global_memh; + ucc_tl_ucp_task_t *task = ucc_derived_of(ctask, ucc_tl_ucp_task_t); + ucc_tl_ucp_team_t *team = TASK_TEAM(task); + ptrdiff_t src = (ptrdiff_t)TASK_ARGS(task).src.info.buffer; + ptrdiff_t dest = (ptrdiff_t)TASK_ARGS(task).dst.info.buffer; + size_t nelems = TASK_ARGS(task).src.info.count; + ucc_rank_t grank = UCC_TL_TEAM_RANK(team); + ucc_rank_t gsize = UCC_TL_TEAM_SIZE(team); + ucc_rank_t start = (grank + 1) % gsize; + long *pSync = TASK_ARGS(task).global_work_buffer; + ucc_mem_map_mem_h *src_memh = TASK_ARGS(task).src_memh.global_memh; + ucc_mem_map_mem_h *dst_memh = TASK_ARGS(task).dst_memh.global_memh; ucc_rank_t peer; ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); @@ -33,17 +33,22 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask) nelems = (nelems / gsize) * ucc_dt_size(TASK_ARGS(task).src.info.datatype); dest = dest + grank * nelems; - UCPCHECK_GOTO(ucc_tl_ucp_put_nb((void *)(src + start * nelems), - (void *)dest, nelems, start, src_memh[start], dst_memh[start], team, task), - task, out); - UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, start, src_memh[start], dst_memh[start], team), task, out); + UCPCHECK_GOTO( + ucc_tl_ucp_put_nb((void *)(src + start * nelems), (void *)dest, nelems, + start, src_memh[start], dst_memh[start], team, task), + task, out); + UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, start, src_memh[start], + dst_memh[start], team), + task, out); for (peer = (start + 1) % gsize; peer != start; peer = (peer + 1) % gsize) { - UCPCHECK_GOTO(ucc_tl_ucp_put_nb((void *)(src + peer * nelems), - (void *)dest, nelems, peer, src_memh[peer], dst_memh[peer], team, task), - task, out); - UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, peer, src_memh[peer], dst_memh[peer], team), task, - out); + UCPCHECK_GOTO(ucc_tl_ucp_put_nb( + (void *)(src + peer * nelems), (void *)dest, nelems, + peer, src_memh[peer], dst_memh[peer], team, task), + task, out); + UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, peer, src_memh[peer], + dst_memh[peer], team), + task, out); } return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); diff --git a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c index decfd470a5..ffbec5ee9a 100644 --- a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c +++ b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c @@ -24,8 +24,8 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_start(ucc_coll_task_t *ctask) ucc_aint_t *d_disp = TASK_ARGS(task).dst.info_v.displacements; size_t sdt_size = ucc_dt_size(TASK_ARGS(task).src.info_v.datatype); size_t rdt_size = ucc_dt_size(TASK_ARGS(task).dst.info_v.datatype); - ucc_mem_map_mem_h *src_memh = TASK_ARGS(task).src_memh.global_memh; - ucc_mem_map_mem_h *dst_memh = TASK_ARGS(task).dst_memh.global_memh; + ucc_mem_map_mem_h *src_memh = TASK_ARGS(task).src_memh.global_memh; + ucc_mem_map_mem_h *dst_memh = TASK_ARGS(task).dst_memh.global_memh; ucc_rank_t peer; size_t sd_disp, dd_disp, data_size; @@ -48,9 +48,11 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_start(ucc_coll_task_t *ctask) UCPCHECK_GOTO(ucc_tl_ucp_put_nb(PTR_OFFSET(src, sd_disp), PTR_OFFSET(dest, dd_disp), - data_size, peer, src_memh[peer], dst_memh[peer], team, task), + data_size, peer, src_memh[peer], + dst_memh[peer], team, task), task, out); - UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, peer, src_memh[peer], dst_memh[peer], team), task, out); + UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, peer, src_memh[peer], + dst_memh[peer], team), task, out); } return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); out: diff --git a/src/components/tl/ucp/tl_ucp.c b/src/components/tl/ucp/tl_ucp.c index 2d508da50b..b10d7e56cf 100644 --- a/src/components/tl/ucp/tl_ucp.c +++ b/src/components/tl/ucp/tl_ucp.c @@ -32,16 +32,15 @@ ucc_status_t ucc_tl_ucp_get_lib_properties(ucc_base_lib_properties_t *prop); ucc_status_t ucc_tl_ucp_get_context_attr(const ucc_base_context_t *context, ucc_base_ctx_attr_t *base_attr); -ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, - int type, - void *address, - size_t len, - void *memh, +ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, int type, + void *address, size_t len, void *memh, void *tl_h); -ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer); +ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, void *memh, + void **pack_buffer); -ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, void *memh); +ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, + void *memh); ucc_config_field_t ucc_tl_ucp_lib_config_table[] = { {"", "", NULL, ucc_offsetof(ucc_tl_ucp_lib_config_t, super), diff --git a/src/components/tl/ucp/tl_ucp.h b/src/components/tl/ucp/tl_ucp.h index 92f8b6503b..bbc3bfd9bb 100644 --- a/src/components/tl/ucp/tl_ucp.h +++ b/src/components/tl/ucp/tl_ucp.h @@ -109,9 +109,9 @@ typedef struct ucc_tl_ucp_remote_info { typedef struct ucc_tl_ucp_memh_data { ucc_tl_ucp_remote_info_t rinfo; - void *packed_memh; - size_t packed_memh_len; - ucp_rkey_h rkey; + void *packed_memh; + size_t packed_memh_len; + ucp_rkey_h rkey; } ucc_tl_ucp_memh_data_t; typedef struct ucc_tl_ucp_worker { diff --git a/src/components/tl/ucp/tl_ucp_context.c b/src/components/tl/ucp/tl_ucp_context.c index bf2784d7b0..95529f1058 100644 --- a/src/components/tl/ucp/tl_ucp_context.c +++ b/src/components/tl/ucp/tl_ucp_context.c @@ -555,113 +555,127 @@ static void ucc_tl_ucp_ctx_remote_pack_data(ucc_tl_ucp_context_t *ctx, } } -ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, - int type, - void *address, - size_t len, - void *memh, - void *tl_h) +ucc_status_t ucc_tl_ucp_mem_map_memhbuf(ucc_tl_ucp_context_t *ctx, + void *pack_buffer, ucp_mem_h *mh) { - ucc_tl_ucp_context_t *ctx = ucc_derived_of(context, ucc_tl_ucp_context_t); - ucc_status_t ucc_status = UCC_OK; - ucc_mem_map_tl_t * p_memh = (ucc_mem_map_tl_t *)tl_h; - ucc_tl_ucp_memh_data_t *m_data = (ucc_tl_ucp_memh_data_t *)p_memh->tl_data; ucp_mem_map_params_t mmap_params; - ucp_mem_h mh = NULL; ucs_status_t status; - ucp_memh_pack_params_t pack_params; - // basically an import here - if (type == UCC_MEM_MAP_TYPE_GLOBAL) { - // m_data is lost in the exchange, make a new one + *mh = NULL; + /* unpack here */ + size_t *key_size = (size_t *)pack_buffer; + void *packed_memh = PTR_OFFSET(pack_buffer, sizeof(size_t) * 2 + *key_size); + + mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER; + mmap_params.exported_memh_buffer = packed_memh; + + status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, mh); + if (UCS_OK != status) { + if (status != UCS_ERR_UNREACHABLE) { + tl_error(ctx->super.super.lib, + "ucp_mem_map failed with error code: %s", + ucs_status_string(status)); + return ucs_status_to_ucc_status(status); + } else { + tl_debug(ctx->super.super.lib, + "ucp_mem_map could not map exported memory handle"); + } + } + return UCC_OK; +} + +ucc_status_t ucc_tl_ucp_mem_map(const ucc_base_context_t *context, int type, + void *address, size_t len, void *memh, + void *tl_h) +{ + ucc_tl_ucp_context_t *ctx = ucc_derived_of(context, ucc_tl_ucp_context_t); + ucc_status_t ucc_status = UCC_OK; + ucc_mem_map_tl_t *p_memh = (ucc_mem_map_tl_t *)tl_h; + ucc_tl_ucp_memh_data_t *m_data = (ucc_tl_ucp_memh_data_t *)p_memh->tl_data; + ucp_mem_h mh = NULL; + ucc_mem_map_memh_t *l_memh = (ucc_mem_map_memh_t *)memh; + size_t offset = 0; + ucp_mem_map_params_t mmap_params; + ucs_status_t status; + ucp_memh_pack_params_t pack_params; + + if (type == UCC_MEM_MAP_TYPE_GLOBAL || !m_data) { + /* either we are importing or m_data is null */ m_data = ucc_calloc(1, sizeof(ucc_tl_ucp_memh_data_t), "tl data"); if (!m_data) { tl_error(ctx->super.super.lib, "failed to allocate tl data"); return UCC_ERR_NO_MEMORY; } p_memh->tl_data = m_data; -#if defined(__aarch64__) - ucc_mem_map_memh_t * l_memh = (ucc_mem_map_memh_t *)memh; - /* unpack here */ - size_t *key_size = (size_t *)l_memh->pack_buffer; - void *packed_memh = PTR_OFFSET(l_memh->pack_buffer, sizeof(size_t) * 2 + *key_size); - mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER; - mmap_params.exported_memh_buffer = packed_memh; - + } + if (type == UCC_MEM_MAP_TYPE_LOCAL) { + mmap_params.field_mask = + UCP_MEM_MAP_PARAM_FIELD_ADDRESS | UCP_MEM_MAP_PARAM_FIELD_LENGTH; + mmap_params.address = address; + mmap_params.length = len; + status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); if (UCS_OK != status) { - if (status != UCS_ERR_UNREACHABLE) { - tl_error(ctx->super.super.lib, - "ucp_mem_map failed with error code: %s", ucs_status_string(status)); - ucc_status = ucs_status_to_ucc_status(status); - return ucc_status; - } else { - tl_debug(ctx->super.super.lib, - "ucp_mem_map could not map exported memory handle"); - ucc_status = UCC_OK; // this is still OK - } - } else { - m_data->rinfo.mem_h = mh; - // the rest of the data is garbage. fix it - m_data->rinfo.va_base = address; - m_data->rinfo.len = len; + tl_error(ctx->super.super.lib, + "ucp_mem_map failed with error code: %d", status); + ucc_status = ucs_status_to_ucc_status(status); } -#endif } else { - if (!m_data) { - m_data = ucc_calloc(1, sizeof(ucc_tl_ucp_memh_data_t), "tl data"); - if (!m_data) { - tl_error(ctx->super.super.lib, "failed to allocate TL/UCP specific data"); - return UCC_ERR_NO_MEMORY; + for (int i = 0; i < l_memh->num_tls; i++) { + size_t *p = (size_t *)PTR_OFFSET(l_memh->pack_buffer, offset); + + if (tl_h == (void *)&l_memh->tl_h[i]) { + break; } - p_memh->tl_data = m_data; - } - // local, we need to map it here - if (m_data->rinfo.mem_h == NULL) { - mmap_params.field_mask = - UCP_MEM_MAP_PARAM_FIELD_ADDRESS | UCP_MEM_MAP_PARAM_FIELD_LENGTH; - mmap_params.address = address; - mmap_params.length = len; - - status = ucp_mem_map(ctx->worker.ucp_context, &mmap_params, &mh); - if (UCS_OK != status) { - tl_error(ctx->super.super.lib, - "ucp_mem_map failed with error code: %d", status); - ucc_status = ucs_status_to_ucc_status(status); + /* this is not the index, skip this section of buffer if exists */ + if (p[0] == i) { + offset += p[1]; } } - m_data->rinfo.mem_h = mh; - m_data->rinfo.va_base = address; - m_data->rinfo.len = len; - // export memh - pack_params.field_mask = UCP_MEMH_PACK_PARAM_FIELD_FLAGS; - pack_params.flags = UCP_MEMH_PACK_FLAG_EXPORT; + ucc_status = ucc_tl_ucp_mem_map_memhbuf( + ctx, PTR_OFFSET(l_memh->pack_buffer, offset), &mh); + if (ucc_status != UCC_OK) { + tl_error(ctx->super.super.lib, "ucp_mem_map failed to map memh"); + return ucc_status; + } + } + m_data->rinfo.mem_h = mh; + m_data->rinfo.va_base = address; + m_data->rinfo.len = len; - status = ucp_memh_pack(mh, &pack_params, &m_data->packed_memh, &m_data->packed_memh_len); + if (type == UCC_MEM_MAP_TYPE_LOCAL) { + status = ucp_memh_pack(mh, &pack_params, &m_data->packed_memh, + &m_data->packed_memh_len); if (status != UCS_OK) { // we don't support memory pack, or it failed - tl_debug("ucp_memh_pack() returned error %s", ucs_status_string(status)); - m_data->packed_memh = 0; + tl_debug(ctx->super.super.lib, "ucp_memh_pack() returned error %s", + ucs_status_string(status)); + m_data->packed_memh = 0; m_data->packed_memh_len = 0; } // pack rkey - status = ucp_rkey_pack(ctx->worker.ucp_context, mh, &m_data->rinfo.packed_key, &m_data->rinfo.packed_key_len); + status = ucp_rkey_pack(ctx->worker.ucp_context, mh, + &m_data->rinfo.packed_key, + &m_data->rinfo.packed_key_len); if (status != UCS_OK) { - tl_error("unable to pack rkey with error %s", ucs_status_string(status)); + tl_error(ctx->super.super.lib, "unable to pack rkey with error %s", + ucs_status_string(status)); } - p_memh->packed_size = m_data->packed_memh_len + m_data->rinfo.packed_key_len; + p_memh->packed_size = + m_data->packed_memh_len + m_data->rinfo.packed_key_len; } return ucc_status; } -ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, void *memh) +ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, + void *memh) { - ucc_tl_ucp_context_t *ctx = ucc_derived_of(context, ucc_tl_ucp_context_t); - ucc_mem_map_tl_t * p_memh = (ucc_mem_map_tl_t *) memh; + ucc_tl_ucp_context_t *ctx = ucc_derived_of(context, ucc_tl_ucp_context_t); + ucc_mem_map_tl_t *p_memh = (ucc_mem_map_tl_t *)memh; ucc_tl_ucp_memh_data_t *data; - ucs_status_t status; + ucs_status_t status; if (!p_memh) { return UCC_OK; @@ -672,7 +686,8 @@ ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, v if (type == UCC_MEM_MAP_TYPE_LOCAL) { status = ucp_mem_unmap(ctx->worker.ucp_context, data->rinfo.mem_h); if (status != UCS_OK) { - tl_error(ctx->super.super.lib, "ucp_mem_unmap failed with error code %d", status); + tl_error(ctx->super.super.lib, + "ucp_mem_unmap failed with error code %d", status); return ucs_status_to_ucc_status(status); } } else if (type == UCC_MEM_MAP_TYPE_GLOBAL) { @@ -693,13 +708,15 @@ ucc_status_t ucc_tl_ucp_mem_unmap(const ucc_base_context_t *context, int type, v return UCC_OK; } -ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, void *memh, void **pack_buffer) +ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, void *memh, + void **pack_buffer) { - ucc_mem_map_tl_t * p_memh = (ucc_mem_map_tl_t *) memh; - ucc_tl_ucp_memh_data_t *data = p_memh->tl_data; - void *packed_buffer; - size_t *key_size; - size_t *memh_size; + ucc_tl_ucp_context_t *ctx = ucc_derived_of(context, ucc_tl_ucp_context_t); + ucc_mem_map_tl_t *p_memh = (ucc_mem_map_tl_t *)memh; + ucc_tl_ucp_memh_data_t *data = p_memh->tl_data; + void *packed_buffer; + size_t *key_size; + size_t *memh_size; if (!data) { return UCC_OK; @@ -709,20 +726,27 @@ ucc_status_t ucc_tl_ucp_memh_pack(const ucc_base_context_t *context, void *memh, * * packed_key_size | packed_memh_size | packed_key | packed_memh */ - packed_buffer = ucc_malloc(sizeof(size_t) * 2 + data->packed_memh_len + data->rinfo.packed_key_len, + packed_buffer = ucc_malloc(sizeof(size_t) * 2 + data->packed_memh_len + + data->rinfo.packed_key_len, "packed buffer"); if (!packed_buffer) { - ucc_error("failed to allocate a packed buffer of size %lu", data->packed_memh_len + data->rinfo.packed_key_len); + tl_error(ctx->super.super.lib, + "failed to allocate a packed buffer of size %lu", + data->packed_memh_len + data->rinfo.packed_key_len); return UCC_ERR_NO_MEMORY; } - key_size = packed_buffer; - *key_size = data->rinfo.packed_key_len; - memh_size = PTR_OFFSET(packed_buffer, sizeof(size_t)); + key_size = packed_buffer; + *key_size = data->rinfo.packed_key_len; + memh_size = PTR_OFFSET(packed_buffer, sizeof(size_t)); *memh_size = data->packed_memh_len; - memcpy(PTR_OFFSET(packed_buffer, sizeof(size_t) * 2), data->rinfo.packed_key, *key_size); - memcpy(PTR_OFFSET(packed_buffer, sizeof(size_t) * 2 + data->rinfo.packed_key_len), data->packed_memh, *memh_size); - - p_memh->packed_size = sizeof(size_t) * 2 + data->packed_memh_len + data->rinfo.packed_key_len; + memcpy(PTR_OFFSET(packed_buffer, sizeof(size_t) * 2), + data->rinfo.packed_key, *key_size); + memcpy(PTR_OFFSET(packed_buffer, + sizeof(size_t) * 2 + data->rinfo.packed_key_len), + data->packed_memh, *memh_size); + + p_memh->packed_size = + sizeof(size_t) * 2 + data->packed_memh_len + data->rinfo.packed_key_len; *pack_buffer = packed_buffer; return UCC_OK; } diff --git a/src/components/tl/ucp/tl_ucp_sendrecv.h b/src/components/tl/ucp/tl_ucp_sendrecv.h index fa02790abe..bcdb43cb3b 100644 --- a/src/components/tl/ucp/tl_ucp_sendrecv.h +++ b/src/components/tl/ucp/tl_ucp_sendrecv.h @@ -273,7 +273,6 @@ static inline ucc_status_t ucc_tl_ucp_check_memh(ucp_ep_h *ep, void *va, uint64_ } } *rkey = tl_data->rkey; - /* FIXME: packed memh? */ return UCC_OK; } return UCC_ERR_NOT_FOUND; diff --git a/src/core/ucc_context.c b/src/core/ucc_context.c index 5586a85fbd..d63c144d60 100644 --- a/src/core/ucc_context.c +++ b/src/core/ucc_context.c @@ -1132,16 +1132,16 @@ ucc_status_t ucc_context_get_attr(ucc_context_t *context, return status; } -ucc_status_t ucc_mem_map_import(ucc_context_h context, +ucc_status_t ucc_mem_map_import(ucc_context_h context, ucc_mem_map_params_t *params, size_t *memh_size, ucc_mem_map_mem_h *memh) { - ucc_context_t *ctx = (ucc_context_t *)context; - ucc_status_t status = UCC_OK; - ucc_config_names_array_t *tls = &ctx->all_tls; - int i; - ucc_mem_map_memh_t *local_memh; - ucc_tl_lib_t *tl_lib; + ucc_context_t *ctx = (ucc_context_t *)context; + ucc_status_t status = UCC_OK; + ucc_config_names_array_t *tls = &ctx->all_tls; + int i; + ucc_mem_map_memh_t *local_memh; + ucc_tl_lib_t *tl_lib; if (!memh) { ucc_error("cannot import NULL memory handle"); @@ -1163,8 +1163,9 @@ ucc_status_t ucc_mem_map_import(ucc_context_h context, tl_lib = ucc_derived_of(ctx->tl_ctx[i]->super.lib, ucc_tl_lib_t); /* FIXME: i don't think this will work properly for more than 1 TL */ status = tl_lib->iface->context.mem_map( - (const ucc_base_context_t *)ctx->tl_ctx[i], UCC_MEM_MAP_TYPE_GLOBAL, params->segments[0].address, params->segments[0].len, - local_memh, &local_memh->tl_h[i]); + (const ucc_base_context_t *)ctx->tl_ctx[i], UCC_MEM_MAP_TYPE_GLOBAL, + params->segments[0].address, params->segments[0].len, local_memh, + &local_memh->tl_h[i]); if (status < UCC_ERR_NOT_IMPLEMENTED) { ucc_error("failed to import mem map memh %d", status); return status; @@ -1174,27 +1175,28 @@ ucc_status_t ucc_mem_map_import(ucc_context_h context, local_memh->type = UCC_MEM_MAP_TYPE_GLOBAL; /* fix context as it will be incorrect on a different system */ local_memh->context = ctx; + *memh_size = 0; return status; } -ucc_status_t ucc_mem_map_export(ucc_context_h context, +ucc_status_t ucc_mem_map_export(ucc_context_h context, ucc_mem_map_params_t *params, size_t *memh_size, ucc_mem_map_mem_h *memh) { - ucc_context_t *ctx = (ucc_context_t *)context; - size_t total_pack_size = 0; - ucc_mem_map_memh_t *local_memh; - ucc_mem_map_memh_t *exported_memh; - void **packed_buffers; - ucc_status_t status; - ucc_tl_lib_t *tl_lib; - size_t offset; - int i; + ucc_context_t *ctx = (ucc_context_t *)context; + size_t total_pack_size = 0; + ucc_mem_map_memh_t *local_memh; + ucc_mem_map_memh_t *exported_memh; + void **packed_buffers; + ucc_status_t status; + ucc_tl_lib_t *tl_lib; + size_t offset; + int i; ucc_config_names_array_t *tls = &ctx->all_tls; - local_memh = (ucc_mem_map_memh_t *)ucc_calloc( - 1, sizeof(ucc_mem_map_memh_t), "local memh"); + local_memh = (ucc_mem_map_memh_t *)ucc_calloc(1, sizeof(ucc_mem_map_memh_t), + "local memh"); if (!local_memh) { ucc_error("failed to allocate a local memory handle"); return UCC_ERR_NO_MEMORY; @@ -1210,8 +1212,9 @@ ucc_status_t ucc_mem_map_export(ucc_context_h context, tl_lib = ucc_derived_of(ctx->tl_ctx[i]->super.lib, ucc_tl_lib_t); /* always treat as a local mem handle */ status = tl_lib->iface->context.mem_map( - (const ucc_base_context_t *)ctx->tl_ctx[i], UCC_MEM_MAP_TYPE_LOCAL, params->segments[0].address, params->segments[0].len, - local_memh, &local_memh->tl_h[i]); + (const ucc_base_context_t *)ctx->tl_ctx[i], UCC_MEM_MAP_TYPE_LOCAL, + params->segments[0].address, params->segments[0].len, local_memh, + &local_memh->tl_h[i]); if (status != UCC_OK) { if (status < UCC_ERR_NOT_IMPLEMENTED) { ucc_error("failed to map memory"); @@ -1231,7 +1234,8 @@ ucc_status_t ucc_mem_map_export(ucc_context_h context, tl_lib = ucc_derived_of(ctx->tl_ctx[i]->super.lib, ucc_tl_lib_t); /* tl should set packed_size, allocate buffer, pack memh */ status = tl_lib->iface->context.memh_pack( - (const ucc_base_context_t *)ctx->tl_ctx[i], &local_memh->tl_h[i], &packed_buffers[i]); + (const ucc_base_context_t *)ctx->tl_ctx[i], + &local_memh->tl_h[i], &packed_buffers[i]); if (status != UCC_OK) { if (status < UCC_ERR_NOT_IMPLEMENTED) { ucc_error("failed to map memory"); @@ -1244,7 +1248,10 @@ ucc_status_t ucc_mem_map_export(ucc_context_h context, /* allocate exported memh, copy items over */ exported_memh = (ucc_mem_map_memh_t *)ucc_calloc( - 1, sizeof(ucc_mem_map_memh_t) + total_pack_size + 2 * sizeof(size_t) * ctx->n_tl_ctx, "exported memh"); + 1, + sizeof(ucc_mem_map_memh_t) + total_pack_size + + 2 * sizeof(size_t) * ctx->n_tl_ctx, + "exported memh"); if (!exported_memh) { ucc_error("failed to allocate handle for exported buffers"); return UCC_ERR_NO_MEMORY; @@ -1257,8 +1264,8 @@ ucc_status_t ucc_mem_map_export(ucc_context_h context, if (local_memh->tl_h[i].packed_size == 0) { continue; } - memcpy(PTR_OFFSET(exported_memh->pack_buffer, offset), - &tl_index, sizeof(size_t)); + memcpy(PTR_OFFSET(exported_memh->pack_buffer, offset), &tl_index, + sizeof(size_t)); offset += sizeof(size_t); memcpy(PTR_OFFSET(exported_memh->pack_buffer, offset), &local_memh->tl_h[i].packed_size, sizeof(size_t)); @@ -1272,20 +1279,22 @@ ucc_status_t ucc_mem_map_export(ucc_context_h context, strncpy(exported_memh->tl_h[i].tl_name, tls->names[i], 8); } ucc_free(local_memh); - exported_memh->type = UCC_MEM_MAP_TYPE_LOCAL; - exported_memh->context = ctx; - exported_memh->address = params->segments[0].address; - exported_memh->len = params->segments[0].len; + exported_memh->type = UCC_MEM_MAP_TYPE_LOCAL; + exported_memh->context = ctx; + exported_memh->address = params->segments[0].address; + exported_memh->len = params->segments[0].len; exported_memh->my_ctx_rank = ctx->rank; - exported_memh->num_tls = ctx->n_tl_ctx; - *memh = exported_memh; - *memh_size = total_pack_size; + exported_memh->num_tls = ctx->n_tl_ctx; + *memh = exported_memh; + *memh_size = total_pack_size; return UCC_OK; failed_pack: failed_mem_map: for (int j = 0; j < i; j++) { tl_lib = ucc_derived_of(ctx->tl_ctx[i]->super.lib, ucc_tl_lib_t); - tl_lib->iface->context.mem_unmap((const ucc_base_context_t *)ctx, UCC_MEM_MAP_TYPE_LOCAL, &local_memh->tl_h[j]); + tl_lib->iface->context.mem_unmap((const ucc_base_context_t *)ctx, + UCC_MEM_MAP_TYPE_LOCAL, + &local_memh->tl_h[j]); } return status; } @@ -1307,7 +1316,6 @@ ucc_status_t ucc_mem_map(ucc_context_h context, ucc_mem_map_flags_t flags, // set map type to local return ucc_mem_map_export(context, params, memh_size, memh); } - return UCC_OK; } ucc_status_t ucc_mem_unmap(ucc_mem_map_mem_h *memh) @@ -1324,11 +1332,13 @@ ucc_status_t ucc_mem_unmap(ucc_mem_map_mem_h *memh) } /* it could be global or local type */ lmemh = (ucc_mem_map_memh_t *)*memh; - ctx = (ucc_context_t *)lmemh->context; + ctx = (ucc_context_t *)lmemh->context; for (i = 0; i < ctx->n_tl_ctx; i++) { tl_lib = ucc_derived_of(ctx->tl_ctx[i]->super.lib, ucc_tl_lib_t); - status = tl_lib->iface->context.mem_unmap((const ucc_base_context_t *)ctx->tl_ctx[i], lmemh->type, &lmemh->tl_h[i]); + status = tl_lib->iface->context.mem_unmap( + (const ucc_base_context_t *)ctx->tl_ctx[i], lmemh->type, + &lmemh->tl_h[i]); if (status < UCC_ERR_NOT_IMPLEMENTED) { ucc_error("we had an error"); return status; diff --git a/src/core/ucc_context.h b/src/core/ucc_context.h index f8e255257d..916c87478a 100644 --- a/src/core/ucc_context.h +++ b/src/core/ucc_context.h @@ -101,7 +101,7 @@ typedef enum { typedef struct ucc_mem_map_tl_t { size_t packed_size; - char tl_name[8]; // typically less than 4 letters + char tl_name[8]; void *tl_data; /* tl specific data */ } ucc_mem_map_tl_t; @@ -110,9 +110,7 @@ typedef struct ucc_mem_map_memh_t { ucc_context_h context; void *address; size_t len; - /* rank of exporting process */ ucc_rank_t my_ctx_rank; - /* handles for each tl */ ucc_mem_map_tl_t *tl_h; int num_tls; char pack_buffer[0];