Skip to content

Commit

Permalink
Update for HPC-X packaging changes and bump default version to 2.19 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
samcmill authored Jun 6, 2024
1 parent d893840 commit 61175ab
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 48 deletions.
3 changes: 2 additions & 1 deletion docs/building_blocks.md
Original file line number Diff line number Diff line change
Expand Up @@ -1475,6 +1475,7 @@ __Parameters__


- __buildlabel__: The build label assigned by Mellanox to the tarball.
For versions 2.17 and later, the default value is `cuda12`.
For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`.
For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`.
For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`.
Expand Down Expand Up @@ -1543,7 +1544,7 @@ distributions the default values are `bzip2`, `numactl-libs`,
`/usr/local/hpcx`.

- __version__: The version of Mellanox HPC-X to install. The default
value is `2.16`.
value is `2.19`.

__Examples__

Expand Down
13 changes: 9 additions & 4 deletions hpccm/building_blocks/hpcx.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig,
# Parameters
buildlabel: The build label assigned by Mellanox to the tarball.
For versions 2.17 and later, the default value is `cuda12`.
For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`.
For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`.
For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`.
Expand Down Expand Up @@ -117,7 +118,7 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig,
`/usr/local/hpcx`.
version: The version of Mellanox HPC-X to install. The default
value is `2.16`.
value is `2.19`.
# Examples
Expand Down Expand Up @@ -146,13 +147,15 @@ def __init__(self, **kwargs):
self.__ospackages = kwargs.get('ospackages', []) # Filled in by _distro()
self.__packages = kwargs.get('packages', [])
self.__prefix = kwargs.get('prefix', '/usr/local/hpcx')
self.__version = kwargs.get('version', '2.16')
self.__version = kwargs.get('version', '2.19')

self.__commands = [] # Filled in by __setup()
self.__wd = kwargs.get('wd', hpccm.config.g_wd) # working directory

if not self.__buildlabel:
if Version(self.__version) >= Version('2.16'):
if Version(self.__version) >= Version('2.17'):
self.__buildlabel = 'cuda12'
elif Version(self.__version) >= Version('2.16'):
self.__buildlabel = 'cuda12-gdrcopy2-nccl2.18'
elif Version(self.__version) >= Version('2.15'):
self.__buildlabel = 'cuda12-gdrcopy2-nccl2.17'
Expand Down Expand Up @@ -218,7 +221,9 @@ def __distro(self):

elif hpccm.config.g_linux_distro == linux_distro.CENTOS:
if not self.__oslabel:
if hpccm.config.g_linux_version >= Version('8.0'):
if hpccm.config.g_linux_version >= Version('9.0'):
self.__oslabel = 'redhat9'
elif hpccm.config.g_linux_version >= Version('8.0'):
if Version(self.__version) >= Version('2.10'):
self.__oslabel = 'redhat8'
else:
Expand Down
86 changes: 43 additions & 43 deletions test/test_hpcx.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,14 @@ def setUp(self):
"""Disable logging output messages"""
logging.disable(logging.ERROR)

@x86_64
@ubuntu18
@docker
def test_defaults_ubuntu18(self):
"""Default hpcx building block"""
h = hpcx()
self.assertEqual(str(h),
r'''# Mellanox HPC-X version 2.16
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
bzip2 \
libnuma1 \
openssh-client \
tar \
wget && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \
echo "hpcx_load" >> /etc/bash.bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64''')

@x86_64
@ubuntu20
@docker
def test_defaults_ubuntu20(self):
"""Default hpcx building block"""
h = hpcx()
self.assertEqual(str(h),
r'''# Mellanox HPC-X version 2.16
r'''# Mellanox HPC-X version 2.19
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
bzip2 \
Expand All @@ -70,12 +47,12 @@ def test_defaults_ubuntu20(self):
tar \
wget && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \
echo "hpcx_load" >> /etc/bash.bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64''')
rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64''')

@x86_64
@centos
Expand All @@ -84,20 +61,20 @@ def test_defaults_centos7(self):
"""Default mlnx_ofed building block"""
h = hpcx()
self.assertEqual(str(h),
r'''# Mellanox HPC-X version 2.16
r'''# Mellanox HPC-X version 2.19
RUN yum install -y \
bzip2 \
numactl-libs \
openssh-clients \
tar \
wget && \
rm -rf /var/cache/yum/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bashrc && \
echo "hpcx_load" >> /etc/bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64''')
rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64''')

@x86_64
@centos8
Expand All @@ -106,20 +83,20 @@ def test_defaults_centos8(self):
"""Default mlnx_ofed building block"""
h = hpcx()
self.assertEqual(str(h),
r'''# Mellanox HPC-X version 2.16
r'''# Mellanox HPC-X version 2.19
RUN yum install -y \
bzip2 \
numactl-libs \
openssh-clients \
tar \
wget && \
rm -rf /var/cache/yum/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bashrc && \
echo "hpcx_load" >> /etc/bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64''')
rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64''')

@x86_64
@ubuntu
Expand Down Expand Up @@ -283,6 +260,29 @@ def test_ldconfig_multi_thread(self):
PKG_CONFIG_PATH=/usr/local/hpcx/hcoll/lib/pkgconfig:/usr/local/hpcx/ompi/lib/pkgconfig:/usr/local/hpcx/sharp/lib/pkgconfig:/usr/local/hpcx/ucx/mt/lib/pkgconfig:$PKG_CONFIG_PATH \
SHMEM_HOME=/usr/local/hpcx/ompi''')

@x86_64
@ubuntu18
@docker
def test_version216_ubuntu18(self):
"""Default hpcx building block"""
h = hpcx(version='2.16')
self.assertEqual(str(h),
r'''# Mellanox HPC-X version 2.16
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
bzip2 \
libnuma1 \
openssh-client \
tar \
wget && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \
echo "hpcx_load" >> /etc/bash.bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64''')

@x86_64
@ubuntu22
@docker
Expand All @@ -291,7 +291,7 @@ def test_runtime(self):
h = hpcx()
r = h.runtime()
self.assertEqual(r,
r'''# Mellanox HPC-X version 2.16
r'''# Mellanox HPC-X version 2.19
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
bzip2 \
Expand All @@ -300,9 +300,9 @@ def test_runtime(self):
tar \
wget && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \
RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz && \
mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz -C /var/tmp -j && \
cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64 /usr/local/hpcx && \
echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \
echo "hpcx_load" >> /etc/bash.bashrc && \
rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64''')
rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64''')

0 comments on commit 61175ab

Please sign in to comment.