From 61175ab627682b82f054bb857afad12826af95c8 Mon Sep 17 00:00:00 2001 From: Scott McMillan Date: Thu, 6 Jun 2024 09:27:17 -0500 Subject: [PATCH] Update for HPC-X packaging changes and bump default version to 2.19 (#487) --- docs/building_blocks.md | 3 +- hpccm/building_blocks/hpcx.py | 13 ++++-- test/test_hpcx.py | 86 +++++++++++++++++------------------ 3 files changed, 54 insertions(+), 48 deletions(-) diff --git a/docs/building_blocks.md b/docs/building_blocks.md index 326d4d0a..ccea7be2 100644 --- a/docs/building_blocks.md +++ b/docs/building_blocks.md @@ -1475,6 +1475,7 @@ __Parameters__ - __buildlabel__: The build label assigned by Mellanox to the tarball. +For versions 2.17 and later, the default value is `cuda12`. For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`. For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`. For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`. @@ -1543,7 +1544,7 @@ distributions the default values are `bzip2`, `numactl-libs`, `/usr/local/hpcx`. - __version__: The version of Mellanox HPC-X to install. The default -value is `2.16`. +value is `2.19`. __Examples__ diff --git a/hpccm/building_blocks/hpcx.py b/hpccm/building_blocks/hpcx.py index c3473c95..7d99f32b 100644 --- a/hpccm/building_blocks/hpcx.py +++ b/hpccm/building_blocks/hpcx.py @@ -49,6 +49,7 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig, # Parameters buildlabel: The build label assigned by Mellanox to the tarball. + For versions 2.17 and later, the default value is `cuda12`. For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`. For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`. For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`. @@ -117,7 +118,7 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig, `/usr/local/hpcx`. version: The version of Mellanox HPC-X to install. The default - value is `2.16`. + value is `2.19`. # Examples @@ -146,13 +147,15 @@ def __init__(self, **kwargs): self.__ospackages = kwargs.get('ospackages', []) # Filled in by _distro() self.__packages = kwargs.get('packages', []) self.__prefix = kwargs.get('prefix', '/usr/local/hpcx') - self.__version = kwargs.get('version', '2.16') + self.__version = kwargs.get('version', '2.19') self.__commands = [] # Filled in by __setup() self.__wd = kwargs.get('wd', hpccm.config.g_wd) # working directory if not self.__buildlabel: - if Version(self.__version) >= Version('2.16'): + if Version(self.__version) >= Version('2.17'): + self.__buildlabel = 'cuda12' + elif Version(self.__version) >= Version('2.16'): self.__buildlabel = 'cuda12-gdrcopy2-nccl2.18' elif Version(self.__version) >= Version('2.15'): self.__buildlabel = 'cuda12-gdrcopy2-nccl2.17' @@ -218,7 +221,9 @@ def __distro(self): elif hpccm.config.g_linux_distro == linux_distro.CENTOS: if not self.__oslabel: - if hpccm.config.g_linux_version >= Version('8.0'): + if hpccm.config.g_linux_version >= Version('9.0'): + self.__oslabel = 'redhat9' + elif hpccm.config.g_linux_version >= Version('8.0'): if Version(self.__version) >= Version('2.10'): self.__oslabel = 'redhat8' else: diff --git a/test/test_hpcx.py b/test/test_hpcx.py index a20e1dec..74a6cca9 100644 --- a/test/test_hpcx.py +++ b/test/test_hpcx.py @@ -31,29 +31,6 @@ def setUp(self): """Disable logging output messages""" logging.disable(logging.ERROR) - @x86_64 - @ubuntu18 - @docker - def test_defaults_ubuntu18(self): - """Default hpcx building block""" - h = hpcx() - self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.16 -RUN apt-get update -y && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - bzip2 \ - libnuma1 \ - openssh-client \ - tar \ - wget && \ - rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ - echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ - echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64''') - @x86_64 @ubuntu20 @docker @@ -61,7 +38,7 @@ def test_defaults_ubuntu20(self): """Default hpcx building block""" h = hpcx() self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.16 +r'''# Mellanox HPC-X version 2.19 RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bzip2 \ @@ -70,12 +47,12 @@ def test_defaults_ubuntu20(self): tar \ wget && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu20.04-cuda12-gdrcopy2-nccl2.18-x86_64''') + rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu20.04-cuda12-x86_64''') @x86_64 @centos @@ -84,7 +61,7 @@ def test_defaults_centos7(self): """Default mlnx_ofed building block""" h = hpcx() self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.16 +r'''# Mellanox HPC-X version 2.19 RUN yum install -y \ bzip2 \ numactl-libs \ @@ -92,12 +69,12 @@ def test_defaults_centos7(self): tar \ wget && \ rm -rf /var/cache/yum/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bashrc && \ echo "hpcx_load" >> /etc/bashrc && \ - rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat7-cuda12-gdrcopy2-nccl2.18-x86_64''') + rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat7-cuda12-x86_64''') @x86_64 @centos8 @@ -106,7 +83,7 @@ def test_defaults_centos8(self): """Default mlnx_ofed building block""" h = hpcx() self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.16 +r'''# Mellanox HPC-X version 2.19 RUN yum install -y \ bzip2 \ numactl-libs \ @@ -114,12 +91,12 @@ def test_defaults_centos8(self): tar \ wget && \ rm -rf /var/cache/yum/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bashrc && \ echo "hpcx_load" >> /etc/bashrc && \ - rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-redhat8-cuda12-gdrcopy2-nccl2.18-x86_64''') + rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-redhat8-cuda12-x86_64''') @x86_64 @ubuntu @@ -283,6 +260,29 @@ def test_ldconfig_multi_thread(self): PKG_CONFIG_PATH=/usr/local/hpcx/hcoll/lib/pkgconfig:/usr/local/hpcx/ompi/lib/pkgconfig:/usr/local/hpcx/sharp/lib/pkgconfig:/usr/local/hpcx/ucx/mt/lib/pkgconfig:$PKG_CONFIG_PATH \ SHMEM_HOME=/usr/local/hpcx/ompi''') + @x86_64 + @ubuntu18 + @docker + def test_version216_ubuntu18(self): + """Default hpcx building block""" + h = hpcx(version='2.16') + self.assertEqual(str(h), +r'''# Mellanox HPC-X version 2.16 +RUN apt-get update -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + bzip2 \ + libnuma1 \ + openssh-client \ + tar \ + wget && \ + rm -rf /var/lib/apt/lists/* +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ + echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ + echo "hpcx_load" >> /etc/bash.bashrc && \ + rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu18.04-cuda12-gdrcopy2-nccl2.18-x86_64''') + @x86_64 @ubuntu22 @docker @@ -291,7 +291,7 @@ def test_runtime(self): h = hpcx() r = h.runtime() self.assertEqual(r, -r'''# Mellanox HPC-X version 2.16 +r'''# Mellanox HPC-X version 2.19 RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bzip2 \ @@ -300,9 +300,9 @@ def test_runtime(self): tar \ wget && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.16/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.19/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64.tbz /var/tmp/hpcx-v2.16-gcc-mlnx_ofed-ubuntu22.04-cuda12-gdrcopy2-nccl2.18-x86_64''') + rm -rf /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.19-gcc-mlnx_ofed-ubuntu22.04-cuda12-x86_64''')