Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bd3fe97
First prototype for windows and shared memory
hmenke Sep 26, 2023
68de45c
Add simple shared array test
hmenke Sep 28, 2023
94cdbdc
Add complicated distributed shared array test
hmenke Oct 3, 2023
4b70d82
Cover some more MPI_Win_* API surface
hmenke Oct 9, 2023
c68c657
Expand API surface and test cases
hmenke Feb 22, 2025
31014b3
Add missing headers
hmenke Feb 25, 2025
6660eb1
shared communicator constructors
Mobellaaj Mar 11, 2025
52cc2ca
Inherit communicator constructors and pass by ref to window
hmenke Mar 11, 2025
8209bd3
Store communicator inside the window and remove std::span
hmenke Apr 13, 2025
e95f8e7
Rename member variables in mpi::communicator
Thoemi09 May 7, 2025
a6ce885
Clean up and minor code simplifications in communicator.hpp
Thoemi09 May 7, 2025
128f604
[doc] Update docs in communicator.hpp
Thoemi09 May 8, 2025
a22aa0d
Clean up and minor code simplifications in group.hpp
Thoemi09 May 8, 2025
38f6f1c
[doc] Update docs in group.hpp
Thoemi09 May 8, 2025
17e973f
Update tests for communicator objects
Thoemi09 May 8, 2025
e1aec98
Rename member variables in mpi::group
Thoemi09 May 8, 2025
034769f
Add tests for group objects
Thoemi09 May 8, 2025
3c1da9a
Rename member variables in mpi::window
Thoemi09 May 8, 2025
f1e9978
Clean up constructors, destructor and conversion functions in mpi::wi…
Thoemi09 May 8, 2025
f3fcaba
Clean up synchronization ops in mpi::window
Thoemi09 May 8, 2025
83a5fde
Clean up get and put methods in mpi::window
Thoemi09 May 8, 2025
9cc887e
Clean up getter methods in mpi::window
Thoemi09 May 8, 2025
7a956e0
Remove data() getters in mpi::window
Thoemi09 May 8, 2025
d2559e0
Clean up mpi::shared_window
Thoemi09 May 8, 2025
5182735
Clean up in mpi_window.cpp
Thoemi09 May 8, 2025
ca9026b
Add a multi-node CI job for MPI Shared Memory
hmenke Jul 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions .github/workflows/build_multi_node.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: build

on:
push:
branches: [ unstable ]
pull_request:
branches: [ unstable ]
workflow_call:
workflow_dispatch:

env:
CMAKE_C_COMPILER_LAUNCHER: ccache
CMAKE_CXX_COMPILER_LAUNCHER: ccache
CCACHE_COMPILERCHECK: content
CCACHE_BASEDIR: ${{ github.workspace }}
CCACHE_DIR: ${{ github.workspace }}/.ccache
CCACHE_MAXSIZE: 500M
CCACHE_SLOPPINESS: pch_defines,time_macros,include_file_mtime,include_file_ctime
CCACHE_COMPRESS: "1"
CCACHE_COMPRESSLEVEL: "1"

jobs:
build_multi_node:

strategy:
fail-fast: false

runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v4

- uses: actions/cache/restore@v4
with:
path: ${{ env.CCACHE_DIR }}
key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }}
restore-keys:
ccache-${{ matrix.os }}-${{ matrix.cc }}-

- name: Install ubuntu dependencies
run: >
sudo apt-get update && sudo apt-get install ccache

- name: Build and start Docker Compose
run: |
docker compose build
docker compose up -d
working-directory: .github/workflows/docker

- name: Compile MPI inside the container
run: |
docker exec -t -u runner -w ${{ github.workspace }} docker-vm-1 /bin/bash -euxc '
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/install -DBuild_Documentation=Off
cmake --build build/ -j2
'

- name: Run tests inside the container
run: |
docker exec -t -u runner -w ${{ github.workspace }} docker-vm-1 /bin/bash -euxc '
export CTEST_OUTPUT_ON_FAILURE=1
cmake --build build/ --target test
'

- name: Run tests inside the container
run: |
docker exec -t -u runner -w ${{ github.workspace }} docker-vm-1 /bin/bash -euxc '
cat <<EOF | tee hostfile
docker-vm-1 slots=3
docker-vm-2 slots=1
docker-vm-3 slots=2
EOF

# Test the communication
mpirun -hostfile ./hostfile /bin/bash -c "env | grep \"^OMPI_COMM_.*_RANK\""

# Run the actual test
mpirun -hostfile ./hostfile build/test/c++/mpi_window
'

- name: ccache statistics
if: always()
run: ccache -sv

- uses: actions/cache/save@v4
if: always()
with:
path: ${{ env.CCACHE_DIR }}
key: ccache-${{ matrix.os }}-${{ matrix.cc }}-${{ github.run_id }}
26 changes: 26 additions & 0 deletions .github/workflows/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM ubuntu:24.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update \
&& apt-get install --no-install-recommends -y \
ca-certificates \
build-essential \
ccache \
cmake \
g++ \
git \
libopenmpi-dev \
openmpi-bin \
openssh-server \
&& rm -rf /var/cache/apt /var/lib/apt/lists
RUN useradd -m runner \
&& passwd -d runner \
&& mkdir -pm0755 /run/sshd \
&& echo "PermitRootLogin yes" >> /etc/ssh/sshd_config.d/99-insecure.conf \
&& echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config.d/99-insecure.conf \
&& echo "PermitEmptyPasswords yes" >> /etc/ssh/sshd_config.d/99-insecure.conf \
&& echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config.d/99-insecure.conf \
&& echo "LogLevel ERROR" >> /etc/ssh/ssh_config.d/99-insecure.conf
ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
ENV OMPI_MCA_osc=sm,pt2pt
ENV OMPI_MCA_rmaps_base_oversubscribe=yes
CMD ["/usr/sbin/sshd", "-D"]
20 changes: 20 additions & 0 deletions .github/workflows/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
services:
vm:
build: .
deploy:
mode: replicated
replicas: 3
environment:
CMAKE_C_COMPILER_LAUNCHER: ${CMAKE_C_COMPILER_LAUNCHER:-ccache}
CMAKE_CXX_COMPILER_LAUNCHER: ${CMAKE_CXX_COMPILER_LAUNCHER:-ccache}
CCACHE_COMPILERCHECK: ${CCACHE_COMPILERCHECK:-content}
CCACHE_BASEDIR: ${CCACHE_BASEDIR}
CCACHE_DIR: ${CCACHE_DIR:-${CCACHE_BASEDIR}/.ccache}
CCACHE_MAXSIZE: ${CCACHE_MAXSIZE:-500M}
CCACHE_SLOPPINESS: ${CCACHE_SLOPPINESS:-pch_defines,time_macros,include_file_mtime,include_file_ctime}
CCACHE_COMPRESS: ${CCACHE_COMPRESS:-1}
CCACHE_COMPRESSLEVEL: ${CCACHE_COMPRESSLEVEL:-1}
stdin_open: true
tty: true
volumes:
- ${CCACHE_BASEDIR}:${CCACHE_BASEDIR}:z
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ compile_commands.json
doc/_autosummary
doc/cpp2rst_generated
doc/html
build/
143 changes: 92 additions & 51 deletions c++/mpi/communicator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,98 +31,111 @@

namespace mpi {

// Forward declaration.
class shared_communicator;

/**
* @ingroup mpi_essentials
* @brief C++ wrapper around `MPI_Comm` providing various convenience functions.
*
* @details It stores an `MPI_Comm` object as its only member which by default is set to `MPI_COMM_WORLD`.
* Note that copying the communicator simply copies the `MPI_Comm` object, without calling `MPI_Comm_dup`.
* @details It stores an `MPI_Comm` object as its only member which by default is set to `MPI_COMM_WORLD`. The
* underlying `MPI_Comm` object is not freed when a communicator goes out of scope. It is the user's responsibility to
* do so, in case it is needed. Note that copying the communicator simply copies the `MPI_Comm` object, without
* calling `MPI_Comm_dup`.
*
* All functions that make direct calls to the MPI C library throw an exception in case the call fails.
*/
class communicator {
// Wrapped `MPI_Comm` object.
MPI_Comm _com = MPI_COMM_WORLD;

public:
/// Construct a communicator with `MPI_COMM_WORLD`.
communicator() = default;

/**
* @brief Construct a communicator with a given `MPI_Comm` object.
* @details The `MPI_Comm` object is copied without calling `MPI_Comm_dup`.
* @param c `MPI_Comm` object to wrap.
*/
communicator(MPI_Comm c) : _com(c) {}
communicator(MPI_Comm c) : com_(c) {}

/// Get the wrapped `MPI_Comm` object.
[[nodiscard]] MPI_Comm get() const noexcept { return _com; }
[[nodiscard]] MPI_Comm get() const noexcept { return com_; }

/// Check if the contained `MPI_Comm` is `MPI_COMM_NULL`.
[[nodiscard]] bool is_null() const noexcept { return com_ == MPI_COMM_NULL; }

/**
* @brief Get the rank of the calling process in the communicator.
* @return The result of `MPI_Comm_rank` if mpi::has_env is true, otherwise 0.
*/
[[nodiscard]] int rank() const {
if (has_env) {
int num = 0;
check_mpi_call(MPI_Comm_rank(_com, &num), "MPI_Comm_rank");
return num;
} else
return 0;
int r = 0;
if (has_env) check_mpi_call(MPI_Comm_rank(com_, &r), "MPI_Comm_rank");
return r;
}

/**
* @brief Get the size of the communicator.
* @return The result of `MPI_Comm_size` if mpi::has_env is true, otherwise 1.
*/
[[nodiscard]] int size() const {
if (has_env) {
int num = 0;
check_mpi_call(MPI_Comm_size(_com, &num), "MPI_Comm_size");
return num;
} else
return 1;
int s = 1;
if (has_env) check_mpi_call(MPI_Comm_size(com_, &s), "MPI_Comm_size");
return s;
}

/**
* @brief Split the communicator into disjoint subgroups.
*
* @details Calls `MPI_Comm_split` with the given color and key arguments. See the MPI documentation for more details,
* e.g. <a href="https://docs.open-mpi.org/en/v5.0.x/man-openmpi/man3/MPI_Comm_split.3.html">open-mpi docs</a>.
* @details Calls `MPI_Comm_split` with the given color and key arguments. See the MPI documentation for more
* details, e.g. <a href="https://docs.open-mpi.org/en/v5.0.x/man-openmpi/man3/MPI_Comm_split.3.html">open-mpi
* docs</a>.
*
* @warning This allocates a new communicator object. Make sure to call `free` on the returned communicator when
* it is no longer needed.
* @warning This allocates a new communicator object. Make sure to call free() on the returned communicator when it
* is no longer needed.
*
* @param color Determines which processes are put into the same group.
* @param key Determines the rank of the process in the new communicator.
* @return If mpi::has_env is true, return the split `MPI_Comm` object wrapped in a new mpi::communicator, otherwise
* return a default constructed mpi::communicator.
*/
[[nodiscard]] communicator split(int color, int key = 0) const {
if (has_env) {
communicator c;
check_mpi_call(MPI_Comm_split(_com, color, key, &c._com), "MPI_Comm_split");
return c;
} else
return {};
communicator c{};
if (has_env) check_mpi_call(MPI_Comm_split(com_, color, key, &c.com_), "MPI_Comm_split");
return c;
}

/**
* @brief Partition the communicator into subcommunicators according to their type.
*
* @details In the MPI3.0 standard the only supported split type is `MPI_COMM_TYPE_SHARED`. OpenMPI (and possibly
* other implementations) provide more custom split types, however, they are not portable.
*
* @warning This allocates a new communicator object. Make sure to call free on the returned communicator when it
* is no longer needed.
*
* @param split_type Type of processes to be grouped together.
* @param key Determines the rank of the process in the new communicator.
* @return If mpi::has_env is true, return the split `MPI_Comm` object wrapped in a new mpi::communicator, otherwise
* return a default constructed mpi::communicator.
*/
[[nodiscard]] shared_communicator split_shared(int split_type = MPI_COMM_TYPE_SHARED, int key = 0) const;

/**
* @brief Duplicate the communicator.
*
* @details Calls `MPI_Comm_dup` to duplicate the communicator. See the MPI documentation for more details, e.g.
* <a href="https://docs.open-mpi.org/en/v5.0.x/man-openmpi/man3/MPI_Comm_dup.3.html">open-mpi docs</a>.
*
* @warning This allocates a new communicator object. Make sure to call `free` on the returned communicator when
* it is no longer needed.
* @warning This allocates a new communicator object. Make sure to call free on the returned communicator when it
* is no longer needed.
*
* @return If mpi::has_env is true, return the duplicated `MPI_Comm` object wrapped in a new mpi::communicator,
* otherwise return a default constructed mpi::communicator.
*/
[[nodiscard]] communicator duplicate() const {
if (has_env) {
communicator c;
check_mpi_call(MPI_Comm_dup(_com, &c._com), "MPI_Comm_dup");
return c;
} else
return {};
communicator c{};
if (has_env) check_mpi_call(MPI_Comm_dup(com_, &c.com_), "MPI_Comm_dup");
return c;
}

/**
Expand All @@ -135,18 +148,19 @@ namespace mpi {
* Does nothing, if mpi::has_env is false.
*/
void free() {
if (has_env) { check_mpi_call(MPI_Comm_free(&_com), "MPI_Comm_free"); }
if (has_env && !is_null()) check_mpi_call(MPI_Comm_free(&com_), "MPI_Comm_free");
}

/**
* @brief If mpi::has_env is true, `MPI_Abort` is called with the given error code, otherwise std::abort is called.
* @brief If mpi::has_env is true, `MPI_Abort` is called with the given error code, otherwise it calls `std::abort`.
* @param error_code The error code to pass to `MPI_Abort`.
*/
void abort(int error_code) const {
if (has_env)
check_mpi_call(MPI_Abort(_com, error_code), "MPI_Abort");
else
if (has_env) {
check_mpi_call(MPI_Abort(com_, error_code), "MPI_Abort");
} else {
std::abort();
}
}

#ifdef BOOST_MPI_HPP
Expand All @@ -158,33 +172,60 @@ namespace mpi {
/**
* @brief Barrier synchronization.
*
* @details Does nothing if mpi::has_env is false. Otherwise, it either uses a blocking `MPI_Barrier`
* (if the given argument is 0) or a non-blocking `MPI_Ibarrier` call. The given parameter determines
* in milliseconds how often each process calls `MPI_Test` to check if all processes have reached the barrier.
* @details Does nothing if mpi::has_env is false. Otherwise, it either uses a blocking `MPI_Barrier` (if the given
* argument is 0) or a non-blocking `MPI_Ibarrier` call. The given parameter determines in milliseconds how often
* each process calls `MPI_Test` to check if all processes have reached the barrier.
*
* This can considerably reduce the CPU load:
* - 1 msec ~ 1% cpu load
* - 10 msec ~ 0.5% cpu load
* - 100 msec ~ 0.01% cpu load
* - 1 msec ~ 1% cpu load
* - 10 msec ~ 0.5% cpu load
* - 100 msec ~ 0.01% cpu load
*
* For a very unbalanced load that takes a long time to finish, 1000 msec is a good choice.
*
* @param poll_msec The polling interval in milliseconds. If set to 0, a simple `MPI_Barrier` call is used.
* @param poll_msec Polling interval in milliseconds. If set to 0, a simple `MPI_Barrier` call is used.
*/
void barrier(long poll_msec = 1) const {
if (has_env) {
if (poll_msec == 0) {
check_mpi_call(MPI_Barrier(_com), "MPI_Barrier");
check_mpi_call(MPI_Barrier(com_), "MPI_Barrier");
} else {
MPI_Request req{};
int flag = 0;
check_mpi_call(MPI_Ibarrier(_com, &req), "MPI_Ibarrier");
check_mpi_call(MPI_Ibarrier(com_, &req), "MPI_Ibarrier");
while (!flag) {
check_mpi_call(MPI_Test(&req, &flag, MPI_STATUS_IGNORE), "MPI_Test");
usleep(poll_msec * 1000);
}
}
}
}

private:
MPI_Comm com_ = MPI_COMM_WORLD;
};

/**
* @ingroup mpi_osc_shm
* @brief C++ wrapper around `MPI_Comm` that is a result of the mpi::communicator::split_shared operation.
*
* @details In the plain MPI C API it is not distinguishable whether an `MPI_Comm` is local to a shared memory island
* or not. Thus we introduce an extra type for that whose only purpose is to make that distinction on the type-level
* to prevent wrong usage of the shared memory APIs.
*/
class shared_communicator : public communicator {
public:
// Make the constructors of mpi::communicator accessible.
using communicator::communicator;

/// Construct a shared communicator with `MPI_COMM_NULL`.
shared_communicator() : communicator(MPI_COMM_NULL) {}
};

[[nodiscard]] inline shared_communicator communicator::split_shared(int split_type, int key) const {
shared_communicator c{};
if (has_env) check_mpi_call(MPI_Comm_split_type(com_, split_type, key, MPI_INFO_NULL, &c.com_), "MPI_Comm_split_type");
return c;
}

} // namespace mpi
1 change: 1 addition & 0 deletions c++/mpi/datatypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

namespace mpi {

Expand Down
Loading
Loading