-
Notifications
You must be signed in to change notification settings - Fork 11
Add OpenMPI host injection script #53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f3e3f26
00bf9de
9f0b83f
ffa761e
08547bb
347bef1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
name: Test for mpi injection script | ||
on: | ||
pull_request: | ||
workflow_dispatch: | ||
permissions: | ||
contents: read # to fetch code (actions/checkout) | ||
jobs: | ||
build: | ||
runs-on: ubuntu-24.04 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: eessi/github-action-eessi@v3 | ||
|
||
- name: Inject OpenMPI libs | ||
run: | | ||
# Needed dir for the injected libs | ||
mkdir /opt/eessi | ||
|
||
# Load last OpenMPI from 2023b and declaring var for injection path | ||
module load OpenMPI/4.1.6-GCC-13.2.0 | ||
export OPENMPI_TO_INJECT=$EBROOTOPENMPI | ||
module purge | ||
|
||
# Inject script | ||
./scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $OPENMPI_TO_INJECT | ||
|
||
- name: Check with OSU | ||
run: | | ||
# Load OSU version that uses older OpenMPI | ||
module load OSU-Micro-Benchmarks/7.1-1-gompi-2023a | ||
|
||
# Verify injected libs are being used | ||
ldd $(which osu_latency) | grep host_injections | ||
|
||
# Check it works correctly | ||
mpirun -n 2 osu_latency | ||
export output=$? | ||
if [ $output -ne 0 ]; then | ||
echo "MPI injection failed, check the logs for more detail" | ||
else | ||
echo "MPI injection succeded!" | ||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,296 @@ | ||
#!/usr/bin/env bash | ||
|
||
# This script can be used to install the host MPI libraries under the `.../host_injections` directory. | ||
# It allows EESSI software to use the MPI stack from the host. | ||
# | ||
# The `host_injections` directory is a variant symlink that by default points to | ||
# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see | ||
# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the | ||
# installation to be successful, this directory needs to be writeable by the user | ||
# executing this script. | ||
|
||
# Initialise our bash functions | ||
TOPDIR=$(dirname $(realpath $BASH_SOURCE)) | ||
source "$TOPDIR"/../utils.sh | ||
|
||
|
||
# Function to display help message | ||
show_help() { | ||
echo "Usage: $0 [OPTIONS]" | ||
echo "Options:" | ||
echo " --help Display this help message" | ||
echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation (Required)" | ||
echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" | ||
echo " storage during the mpi injection" | ||
echo " --noclean Do not remove the temporary directory and the host injected libraries after finishing injection" | ||
echo " --force Force MPI injection even if it is already done" | ||
} | ||
|
||
|
||
parse_cmdline() { | ||
while [[ $# -gt 0 ]]; do | ||
case "$1" in | ||
--help) | ||
show_help | ||
exit 0 | ||
;; | ||
--mpi-path) | ||
if [ -n "$2" ]; then | ||
readonly MPI_PATH="$2" | ||
shift 2 | ||
else | ||
echo_red "Error: Argument required for $1" | ||
show_help | ||
exit 1 | ||
fi | ||
;; | ||
-t|--temp-dir) | ||
if [ -n "$2" ]; then | ||
readonly TEMP_DIR="$2" | ||
shift 2 | ||
else | ||
echo_red "Error: Argument required for $1" | ||
show_help | ||
exit 1 | ||
fi | ||
;; | ||
--noclean) | ||
CLEAN=false | ||
shift 1 | ||
;; | ||
--force) | ||
FORCE=true | ||
shift 1 | ||
;; | ||
*) | ||
echo_red "Error: Unknown option: $1" | ||
show_help | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
if [ -z "${MPI_PATH}" ]; then | ||
echo_yellow "MPI path was not specified and it is required" | ||
show_help | ||
exit 0 | ||
fi | ||
|
||
readonly CLEAN=${CLEAN:=true} | ||
readonly FORCE=${FORCE:=false} | ||
} | ||
|
||
|
||
# ****Warning: patchelf v0.18.0 (currently shipped with EESSI) does not work.**** | ||
# We get v0.17.2 | ||
download_patchelf() { | ||
# Temporary directory to save patchelf | ||
local tmpdir=$1 | ||
|
||
local patchelf_version="0.17.2" | ||
local url | ||
local curl_opts="-L --silent --show-error --fail" | ||
|
||
url="https://github.com/NixOS/patchelf/releases/download/${patchelf_version}/" | ||
url+="patchelf-${patchelf_version}-${EESSI_CPU_FAMILY}.tar.gz" | ||
|
||
local patchelf_path=${tmpdir}/patchelf | ||
mkdir ${patchelf_path} | ||
|
||
curl ${url} ${curl_opts} -o ${patchelf_path}/patchelf.tar.gz | ||
tar -xf ${patchelf_path}/patchelf.tar.gz -C ${patchelf_path} | ||
PATCHELF_BIN=${patchelf_path}/bin/patchelf | ||
} | ||
|
||
# Declaring this var here to use later on the main function | ||
# This only applies to MPI=>5 | ||
libcuda_exists=0 | ||
|
||
inject_mpi() { | ||
# Temporary directory for injection | ||
local tmpdir=$1 | ||
local timestamp=$(date +%Y%m%d%H%M%S) | ||
|
||
local eessi_ldd="${EESSI_EPREFIX}/usr/bin/ldd" | ||
local system_ldd="/usr/bin/ldd" | ||
|
||
local host_injection_mpi_path | ||
|
||
host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} | ||
host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" | ||
|
||
if [ -d ${host_injection_mpi_path} ]; then | ||
if [ -n "$(ls -A ${host_injection_mpi_path})" ]; then | ||
echo "MPI was already injected" | ||
if ${FORCE}; then | ||
echo "Forcing new MPI injection" | ||
agimenog marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mv ${host_injection_mpi_path} ${host_injection_mpi_path}-${timestamp} | ||
echo "Previous injection saved on" ${host_injection_mpi_path}-${timestamp} | ||
else | ||
return 0 | ||
fi | ||
fi | ||
fi | ||
|
||
mkdir -p ${host_injection_mpi_path} | ||
|
||
local temp_inject_path="${tmpdir}/mpi_inject" | ||
mkdir ${temp_inject_path} | ||
|
||
# Get all library files from openmpi dir | ||
find ${MPI_PATH} -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, this is probably not going to play nice with an OS installed version of OpenMPI as the libraries may well be in the same place as all the other libraries on the system. We should only search for the libraries that make up the compatibility libraries (see https://docs.open-mpi.org/en/v5.0.x/version-numbering.html#shared-library-version-number):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also wonder what will happen when it tries to load a provider... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we also need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the documentation! I also think this will fail as it was originally thinked to inject all libs from one unique directory. I will take a look on that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The provider thing is very tricky, I expect that they would need to be in the same relative location to the libraries There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea how to inspect or test that |
||
|
||
# Copy library links to host injection path | ||
find ${MPI_PATH} -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; | ||
|
||
# Get MPI libs dependencies from system ldd | ||
local libname libpath pmixpath | ||
local -A libs_dict | ||
local -a dlopen_libs | ||
|
||
readarray -d '' dlopen_libs < <(find ${MPI_PATH} -mindepth 2 -name "*.so*") | ||
|
||
# Get all library names and paths in associative array | ||
# If library is libfabric, libpmix, or from the MPI path | ||
# modify libpath in assoc array to point to host_injection_mpi_path | ||
while read -r libname libpath; do | ||
|
||
if [[ ${libname} =~ libfabric\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then | ||
local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work | ||
find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; | ||
find ${libdir} -maxdepth 1 -type l -name "libfabric.so*" -exec cp -P {} ${host_injection_mpi_path} \; | ||
|
||
local depname deppath | ||
|
||
while read -r depname deppath; do | ||
libs_dict[${depname}]=${deppath} | ||
done < <(${system_ldd} ${libpath} | awk '/=>/ {print $1, $3}' | sort | uniq) | ||
|
||
libpath=${host_injection_mpi_path}/$(basename ${libpath}) | ||
fi | ||
|
||
if [[ ${libname} =~ libpmix\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then | ||
local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work | ||
[ -n "${PMIX_PATH}" ] && pmixpath="${PMIX_PATH}/pmix" || pmixpath="$(dirname ${libpath})/pmix" | ||
find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; | ||
find ${libdir} -maxdepth 1 -type l -name "libpmix.so*" -exec cp -P {} ${host_injection_mpi_path} \; | ||
|
||
libpath=${host_injection_mpi_path}/$(basename ${libpath}) | ||
fi | ||
|
||
if [[ ${libpath} =~ ${MPI_PATH} ]]; then | ||
libpath=${host_injection_mpi_path}/$(basename ${libpath}) | ||
fi | ||
|
||
# Forcing libname to be libcuda, if not it will be "not", as the lib is not found | ||
# This only applies to MPI=>5 | ||
if [[ ${libname} =~ libcuda\.so ]]; then | ||
export libcuda_exists=1 | ||
libs_dict[${libname}]="libcuda.so" | ||
else | ||
libs_dict[${libname}]=${libpath} | ||
fi | ||
|
||
done < <(cat <(find ${temp_inject_path} -maxdepth 1 -type f -name "*.so*" -exec ${system_ldd} {} \;) \ | ||
<(for dlopen in ${dlopen_libs[@]}; do ${system_ldd} ${dlopen}; done) \ | ||
| awk '/=>/ {print $1, $3}' | sort | uniq) | ||
|
||
# Do library injection to openmpi libs, libfabric and libpmix | ||
local lib | ||
while read -r lib; do | ||
local dep | ||
|
||
# Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) | ||
# Must be done before the injection of unresolved dependencies | ||
if [[ ${lib} =~ libfabric\.so ]]; then | ||
while read -r dep; do | ||
${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} | ||
done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) | ||
fi | ||
|
||
# Do injection of unresolved libraries | ||
${PATCHELF_BIN} --set-rpath "${host_injection_mpi_path}" ${lib} | ||
while read -r dep; do | ||
if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then | ||
${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} | ||
else | ||
${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} | ||
fi | ||
done < <(${eessi_ldd} ${lib} | awk '/not found/ {print $1}' | sort | uniq) | ||
|
||
# Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so | ||
if [[ ${lib} =~ libmpi\.so ]]; then | ||
while read -r dep; do | ||
if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then | ||
${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} | ||
fi | ||
done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ | ||
| awk '/not found/ && !/libmpi\.so.*/ {print $1}' | sort | uniq) | ||
fi | ||
|
||
done < <(find ${temp_inject_path} -type f) | ||
|
||
# Sanity check MPI injection | ||
local sanity=1 | ||
if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then | ||
cp ${temp_inject_path}/* -t ${host_injection_mpi_path} | ||
# libcuda.so will be always not found | ||
# This only applies to MPI=>5 | ||
if ${eessi_ldd} ${temp_inject_path}/* | grep "not found" | grep -v "libcuda.so" > /dev/null; then | ||
${CLEAN} && rm -f ${host_injection_mpi_path}/*.so* | ||
else | ||
sanity=0 | ||
fi | ||
fi | ||
|
||
return ${sanity} | ||
} | ||
|
||
|
||
main() { | ||
parse_cmdline "$@" | ||
check_eessi_initialised | ||
|
||
# Create directory linked by host_injections | ||
local inject_dir=$(readlink -f /cvmfs/software.eessi.io/host_injections) | ||
[[ ! -d ${inject_dir} ]] && mkdir -p ${inject_dir} | ||
|
||
# we need a directory we can use for temporary storage | ||
if [[ -z "${TEMP_DIR}" ]]; then | ||
tmpdir=$(mktemp -d) | ||
else | ||
tmpdir="${TEMP_DIR}"/temp | ||
if ! mkdir -p "$tmpdir" ; then | ||
fatal_error "Could not create directory ${tmpdir}" | ||
fi | ||
fi | ||
|
||
echo "Temporary directory for injection: ${tmpdir}" | ||
|
||
download_patchelf ${tmpdir} | ||
|
||
local host_injection_mpi_path | ||
|
||
host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} | ||
host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" | ||
|
||
if inject_mpi ${tmpdir}; then | ||
if [ $libcuda_exists -eq 0 ]; then | ||
echo_green "MPI injection was successful" | ||
# This only applies to MPI=>5 | ||
elif [ $libcuda_exists -eq 1 ]; then | ||
echo_yellow "The scripts could not find libcuda file, which may cause some issues, please copy it manually using the following command:" | ||
echo "----------------------" | ||
echo "cp /path/to/libcuda.so" ${host_injection_mpi_path} | ||
echo "----------------------" | ||
echo_green "MPI injection was successful" | ||
fi | ||
else | ||
fatal_error "MPI host injection failed" | ||
fi | ||
|
||
if ${CLEAN}; then | ||
rm -rf "${tmpdir}" | ||
fi | ||
} | ||
|
||
main "$@" |
Uh oh!
There was an error while loading. Please reload this page.