Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions ompi/mca/coll/solo/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# Copyright (c) 2019 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

sources = \
coll_solo.h \
coll_solo_mpool.h \
coll_solo_barrier.c \
coll_solo_reduce.c \
coll_solo_bcast.c \
coll_solo_allreduce.c \
coll_solo_component.c \
coll_solo_module.c \
coll_solo_mpool.c

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

component_noinst =
component_install =
if MCA_BUILD_ompi_coll_solo_DSO
component_install += mca_coll_solo.la
else
component_noinst += libmca_coll_solo.la
endif

mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_solo_la_SOURCES = $(sources)
mca_coll_solo_la_LDFLAGS = -module -avoid-version
mca_coll_solo_la_LIBADD =

noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_solo_la_SOURCES =$(sources)
libmca_coll_solo_la_LDFLAGS = -module -avoid-version
191 changes: 191 additions & 0 deletions ompi/mca/coll/solo/coll_solo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/**
* Copyright (c) 2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#ifndef MCA_COLL_SOLO_EXPORT_H
#define MCA_COLL_SOLO_EXPORT_H

#include "ompi_config.h"

#include "mpi.h"
#include "ompi/mca/mca.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/communicator/communicator.h"
#include "ompi/win/win.h"
#include "ompi/include/mpi.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
#include "opal/util/info.h"
#include "ompi/op/op.h"
#include "opal/runtime/opal_progress.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "coll_solo_mpool.h"

BEGIN_C_DECLS
/**
* Structure to hold the solo coll component. First it holds the base coll component, and then
* holds a bunch of solo-coll-component-specific stuff (e.g., current MCA param values).
*/
typedef struct mca_coll_solo_component_t {
/* Base coll component */
mca_coll_base_component_2_0_0_t super;

/* MCA parameters */
/* Priority of the solo module */
int solo_priority;
/* The size of data_bufs in the static_win */
uint32_t static_block_size;
uint32_t mpool_small_block_size;
uint32_t mpool_small_block_num;
uint32_t mpool_large_block_size;
uint32_t mpool_large_block_num;

/* Shared memory pool */
mca_coll_solo_mpool_t *solo_mpool;
} mca_coll_solo_component_t;

/* Coll solo module */
typedef struct mca_coll_solo_module_t {
/* Base module */
mca_coll_base_module_t super;

/* Whether this module has been lazily initialized or not yet */
bool enabled;

/**
* This window is created by ompi_win_allocate_shared such that each process contains a shared
* memory data buffer, and this data buffer is divided into two parts - ctrl_bufs and data_bufs.
*/
MPI_Win static_win;
/**
* The first 4 * opal_cache_line_size bytes in the shared memory data buffer in static_win, used
* to store control messages.
*/
char **ctrl_bufs;
/**
* The rest of the shared memory data buffer in static_win, which is intent to be used to
* tranfer very small messages. Its size is set by static_block_size.
*/
char **data_bufs;

/* Identify which ctrl_buf is currently used in mac_coll_solo_barrier_intra. */
int barrier_tag;
} mca_coll_solo_module_t;
OBJ_CLASS_DECLARATION(mca_coll_solo_module_t);

/**
* Global component instance
*/
OMPI_MODULE_DECLSPEC extern mca_coll_solo_component_t mca_coll_solo_component;

/**
* coll module functions
*/
int mca_coll_solo_init_query(bool enable_progress_threads, bool enable_mpi_threads);

mca_coll_base_module_t *mca_coll_solo_comm_query(struct ompi_communicator_t *comm, int *priority);

/* Lazily enable a module (since it involves expensive memory allocation, etc.) */
int mca_coll_solo_lazy_enable(mca_coll_base_module_t * module, struct ompi_communicator_t *comm);

/* Setup and initialize the static_win of a communicator */
void mca_coll_solo_setup_static_win(mca_coll_solo_module_t *solo_module,
struct ompi_communicator_t *comm,
size_t data_buf_size);

/* MPI_Barrier algorithms */
int mac_coll_solo_barrier_intra(struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);

/* MPI_Bcast algorithms */
int mca_coll_solo_bcast_intra(void *buff, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);

int mca_coll_solo_bcast_linear_intra_memcpy(void *buff, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);

int mca_coll_solo_bcast_pipeline_intra_memcpy(void *buff, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module,
size_t seg_size);

/* MPI_Reduce algorithms */
int mca_coll_solo_reduce_intra(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);

int mca_coll_solo_reduce_ring_intra_memcpy(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t
*comm, mca_coll_base_module_t * module);


/* MPI_Allreduce algorithms */
int mca_coll_solo_allreduce_intra(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);

int mca_coll_solo_allreduce_ring_intra_memcpy(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t * module);


/* Solo pack to shared memory */
static inline void mca_coll_solo_pack_to_shared(void *local_buf, void *shared_buf, struct ompi_datatype_t *dtype, int count, ptrdiff_t extent) {
if (ompi_datatype_is_predefined(dtype)) {
memcpy((char *) shared_buf, (char *) local_buf, count * extent);
}
else {
int pos = 0;
MPI_Pack(local_buf, count, dtype, shared_buf, count * extent, &pos, MPI_COMM_SELF);
}
}

/* Solo unpack from shared memory */
static inline void mca_coll_solo_unpack_from_shared(void *local_buf, void *shared_buf, struct ompi_datatype_t *dtype, int count, ptrdiff_t extent) {
if (ompi_datatype_is_predefined(dtype)) {
memcpy((char *) local_buf, (char *) shared_buf, count * extent);
}
else {
int pos = 0;
MPI_Unpack(shared_buf, count * extent, &pos, local_buf, count, dtype, MPI_COMM_SELF);
}
}

/* Solo copy from source to target */
static inline void mca_coll_solo_copy(void *source, void *target, struct ompi_datatype_t *dtype, int count, ptrdiff_t extent) {
if (ompi_datatype_is_predefined(dtype)) {
memcpy(target, source, count * extent);
}
else {
ompi_datatype_copy_content_same_ddt(dtype, count, target, source);
}
return;
}

END_C_DECLS
#endif /* MCA_COLL_SOLO_EXPORT_H */
Loading