diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index cf23fa526..4073b9657 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -58,9 +58,9 @@ name: mysql tasks_from: install.yml - name: OpenHPC - import_role: + include_role: name: stackhpc.openhpc - tasks_from: install.yml + tasks_from: "install-{{ openhpc_install_type }}.yml" - name: Include distribution variables for osc.ood include_vars: "{{ appliances_repository_root }}/ansible/roles/osc.ood/vars/Rocky/8.yml" diff --git a/ansible/roles/hpctests/README.md b/ansible/roles/hpctests/README.md index ee37791ec..c6a28d26b 100644 --- a/ansible/roles/hpctests/README.md +++ b/ansible/roles/hpctests/README.md @@ -38,6 +38,8 @@ The following variables should not generally be changed: - `hpctests_pingpong_plot`: Whether to plot pingpong results. Default `yes`. - `hpctests_hpl_modules`: As above but for hpl tests. - `hpctests_hpl_version`: Version of HPL +- `hpctests_extra_paths`: List of additional paths to add to $PATH in `pingpong` and `pingmatrix` sbatch scripts. +- `hpctests_pingpong_command`: Command to use to run IMB-MPI1 pingpong. Dependencies ------------ diff --git a/ansible/roles/hpctests/defaults/main.yml b/ansible/roles/hpctests/defaults/main.yml index 280fd454e..08831b7c2 100644 --- a/ansible/roles/hpctests/defaults/main.yml +++ b/ansible/roles/hpctests/defaults/main.yml @@ -1,7 +1,9 @@ --- hpctests_rootdir: +hpctests_extra_paths: [] hpctests_pingmatrix_modules: [gnu12 openmpi4] hpctests_pingpong_modules: [gnu12 openmpi4 imb] +hpctests_pingpong_command: 'mpirun IMB-MPI1 pingpong' # NB 'srun --mpi=pmi2 IMB-MPI1 pingpong' doesn't work in ohpc v2.1 hpctests_pingpong_plot: yes hpctests_hpl_modules: [gnu12 openmpi4 openblas] hpctests_outdir: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/hpctests" diff --git a/ansible/roles/hpctests/templates/pingmatrix.sh.j2 b/ansible/roles/hpctests/templates/pingmatrix.sh.j2 index 17fb3fd6a..26b4de6c2 100644 --- a/ansible/roles/hpctests/templates/pingmatrix.sh.j2 +++ b/ansible/roles/hpctests/templates/pingmatrix.sh.j2 @@ -12,7 +12,8 @@ export UCX_NET_DEVICES={{ hpctests_ucx_net_devices }} echo SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST echo SLURM_JOB_ID: $SLURM_JOB_ID echo UCX_NET_DEVICES: $UCX_NET_DEVICES -module load {{ hpctests_pingmatrix_modules | join(' ' ) }} +{% if hpctests_pingmatrix_modules %}module load {{ hpctests_pingmatrix_modules | join(' ' ) }}{% endif %} +{% if hpctests_extra_paths %}export PATH={{ hpctests_extra_paths | join(':') }}:$PATH{% endif %} mpicc -o nxnlatbw mpi_nxnlatbw.c mpirun nxnlatbw diff --git a/ansible/roles/hpctests/templates/pingpong.sh.j2 b/ansible/roles/hpctests/templates/pingpong.sh.j2 index e74e52539..ae7baa45a 100644 --- a/ansible/roles/hpctests/templates/pingpong.sh.j2 +++ b/ansible/roles/hpctests/templates/pingpong.sh.j2 @@ -12,7 +12,7 @@ export UCX_NET_DEVICES={{ hpctests_ucx_net_devices }} echo SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST echo SLURM_JOB_ID: $SLURM_JOB_ID echo UCX_NET_DEVICES: $UCX_NET_DEVICES -module load {{ hpctests_pingpong_modules | join(' ' ) }} +{% if hpctests_pingpong_modules %}module load {{ hpctests_pingpong_modules | join(' ' ) }}{% endif %} +{% if hpctests_extra_paths %}export PATH={{ hpctests_extra_paths | join(':') }}:$PATH{% endif %} -#srun --mpi=pmi2 IMB-MPI1 pingpong # doesn't work in ohpc v2.1 -mpirun IMB-MPI1 pingpong +{{ hpctests_pingpong_command }} diff --git a/ansible/slurm.yml b/ansible/slurm.yml index 080c74dcb..4144db5f4 100644 --- a/ansible/slurm.yml +++ b/ansible/slurm.yml @@ -25,8 +25,14 @@ tags: - openhpc tasks: - - import_role: + - include_role: + name: stackhpc.openhpc + tasks_from: "install-{{ openhpc_install_type }}.yml" + tags: install + - include_role: name: stackhpc.openhpc + tasks_from: runtime.yml + tags: runtime - name: Set locked memory limits on user-facing nodes hosts: diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index 0ab3be5ee..59e27f11d 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,8 +13,8 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-240116-1156-aa8dba7d" # https://github.com/stackhpc/ansible-slurm-appliance/pull/351 - # default = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2" + # default = "openhpc-240116-1156-aa8dba7d" # https://github.com/stackhpc/ansible-slurm-appliance/pull/351 + default = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2" } variable "cluster_net" {} @@ -62,8 +62,6 @@ module "cluster" { compute_nodes = { compute-0: "small" compute-1: "small" - compute-2: "extra" - compute-3: "extra" } volume_backed_instances = var.volume_backed_instances diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index 1cb963657..d3f4964af 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -2,7 +2,7 @@ # See: https://github.com/stackhpc/ansible-role-openhpc # for variable definitions - +openhpc_install_type: ohpc # use "ohcp" for an OpenHPC-based system or "generic" if providing binaries openhpc_enable: control: "{{ inventory_hostname in groups['control'] }}" batch: "{{ inventory_hostname in groups['compute'] }}" diff --git a/requirements.yml b/requirements.yml index 3587966aa..59e717c67 100644 --- a/requirements.yml +++ b/requirements.yml @@ -3,7 +3,7 @@ roles: - src: stackhpc.nfs version: v23.12.1 # Tolerate state nfs file handles - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: v0.23.0 # https://github.com/stackhpc/ansible-role-openhpc/pull/165 + version: feat/no-ohpc # https://github.com/stackhpc/ansible-role-openhpc/pull/162 name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: stackhpc