diff --git a/docs/mig.md b/docs/mig.md index b8eeae8ad..092629fed 100644 --- a/docs/mig.md +++ b/docs/mig.md @@ -214,10 +214,6 @@ openhpc_nodegroups: - conf: "gpu:nvidia_h100_80gb_hbm3:2" - conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2" - conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6" - -openhpc_config: - GresTypes: - - gpu ``` Making sure the types (the identifier after `gpu:`) match those collected with `slurmd -G`. Substrings diff --git a/environments/.caas/inventory/group_vars/all/openhpc.yml b/environments/.caas/inventory/group_vars/all/openhpc.yml index 56c8b907d..e3ad4c0e4 100644 --- a/environments/.caas/inventory/group_vars/all/openhpc.yml +++ b/environments/.caas/inventory/group_vars/all/openhpc.yml @@ -4,3 +4,6 @@ openhpc_cluster_name: "{{ cluster_name }}" # Provision a single "standard" compute nodegroup using the supplied # node count and flavor openhpc_nodegroups: "{{ hostvars[groups['openstack'][0]]['openhpc_nodegroups'] }}" + +# Enable autoconfiguration of NVIDIA GPUs, if using a suitable (`cuda`) image: +openhpc_gres_autodetect: nvml diff --git a/requirements.yml b/requirements.yml index 5ca864093..0d24cdf2d 100644 --- a/requirements.yml +++ b/requirements.yml @@ -4,7 +4,7 @@ roles: version: v25.3.2 name: stackhpc.nfs - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: v1.4.1 + version: v1.5.0 name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: stackhpc