diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0cb66f..c9f1a6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,6 @@ jobs: - test11 - test12 - test13 - - test14 exclude: # mariadb package provides /usr/bin/mysql on RL8 which doesn't work with geerlingguy/mysql role - scenario: test4 diff --git a/README.md b/README.md index 693556f..d463a20 100644 --- a/README.md +++ b/README.md @@ -50,32 +50,54 @@ each list element: ### slurm.conf -`openhpc_slurm_partitions`: Optional. List of one or more slurm partitions, default `[]`. Each partition may contain the following values: -* `groups`: If there are multiple node groups that make up the partition, a list of group objects can be defined here. - Otherwise, `groups` can be omitted and the following attributes can be defined in the partition object: - * `name`: The name of the nodes within this group. - * `cluster_name`: Optional. An override for the top-level definition `openhpc_cluster_name`. - * `extra_nodes`: Optional. A list of additional node definitions, e.g. for nodes in this group/partition not controlled by this role. Each item should be a dict, with keys/values as per the ["NODE CONFIGURATION"](https://slurm.schedmd.com/slurm.conf.html#lbAE) docs for slurm.conf. Note the key `NodeName` must be first. - * `ram_mb`: Optional. The physical RAM available in each node of this group ([slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `RealMemory`) in MiB. This is set using ansible facts if not defined, equivalent to `free --mebi` total * `openhpc_ram_multiplier`. - * `ram_multiplier`: Optional. An override for the top-level definition `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. +`openhpc_nodegroups`: Optional, default `[]`. List of mappings, each defining a +unique set of homogenous nodes: + * `name`: Required. Name of node group. + * `ram_mb`: Optional. The physical RAM available in each node of this group + ([slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `RealMemory`) + in MiB. This is set using ansible facts if not defined, equivalent to + `free --mebi` total * `openhpc_ram_multiplier`. + * `ram_multiplier`: Optional. An override for the top-level definition + `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). Each dict must define: - `conf`: A string with the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) but requiring the format `::`, e.g. `gpu:A100:2`. Note the `type` is an arbitrary string. - `file`: A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. - Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) must be set in `openhpc_config` if this is used. - -* `default`: Optional. A boolean flag for whether this partion is the default. Valid settings are `YES` and `NO`. -* `maxtime`: Optional. A partition-specific time limit following the format of [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `MaxTime`. The default value is - given by `openhpc_job_maxtime`. The value should be quoted to avoid Ansible conversions. -* `partition_params`: Optional. Mapping of additional parameters and values for [partition configuration](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION). - -For each group (if used) or partition any nodes in an ansible inventory group `_` will be added to the group/partition. Note that: -- Nodes may have arbitrary hostnames but these should be lowercase to avoid a mismatch between inventory and actual hostname. -- Nodes in a group are assumed to be homogenous in terms of processor and memory. -- An inventory group may be empty or missing, but if it is not then the play must contain at least one node from it (used to set processor information). - - -`openhpc_job_maxtime`: Maximum job time limit, default `'60-0'` (60 days). See [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `MaxTime` for format. The default is 60 days. The value should be quoted to avoid Ansible conversions. + * `features`: Optional. List of [Features](https://slurm.schedmd.com/slurm.conf.html#OPT_Features) strings. + * `node_params`: Optional. Mapping of additional parameters and values for + [node configuration](https://slurm.schedmd.com/slurm.conf.html#lbAE). + **NB:** Parameters which can be set via the keys above must not be included here. + + Each nodegroup will contain hosts from an Ansible inventory group named + `{{ openhpc_cluster_name }}_{{ name }}`, where `name` is the nodegroup name. + Note that: + - Each host may only appear in one nodegroup. + - Hosts in a nodegroup are assumed to be homogenous in terms of processor and memory. + - Hosts may have arbitrary hostnames, but these should be lowercase to avoid a + mismatch between inventory and actual hostname. + - An inventory group may be missing or empty, in which case the node group + contains no hosts. + - If the inventory group is not empty the play must contain at least one host. + This is used to set `Sockets`, `CoresPerSocket`, `ThreadsPerCore` and + optionally `RealMemory` for the nodegroup. + +`openhpc_partitions`: Optional. List of mappings, each defining a +partition. Each partition mapping may contain: + * `name`: Required. Name of partition. + * `nodegroups`: Optional. List of node group names. If omitted, the node group + with the same name as the partition is used. + * `default`: Optional. A boolean flag for whether this partion is the default. Valid settings are `YES` and `NO`. + * `maxtime`: Optional. A partition-specific time limit overriding `openhpc_job_maxtime`. + * `partition_params`: Optional. Mapping of additional parameters and values for + [partition configuration](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION). + **NB:** Parameters which can be set via the keys above must not be included here. + +If this variable is not set one partition per nodegroup is created, with default +partition configuration for each. + +`openhpc_job_maxtime`: Maximum job time limit, default `'60-0'` (60 days), see +[slurm.conf:MaxTime](https://slurm.schedmd.com/slurm.conf.html#OPT_MaxTime). +**NB:** This should be quoted to avoid Ansible conversions. `openhpc_cluster_name`: name of the cluster. @@ -170,50 +192,121 @@ accessed (with facts gathering enabled) using `ansible_local.slurm`. As per the in mixed case are from from config files. Note the facts are only refreshed when this role is run. -## Example Inventory +## Example -And an Ansible inventory as this: +### Simple - [openhpc_login] - openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos +The following creates a cluster with a a single partition `compute` +containing two nodes: - [openhpc_compute] - openhpc-compute-0 ansible_host=10.60.253.31 ansible_user=centos - openhpc-compute-1 ansible_host=10.60.253.32 ansible_user=centos +```ini +# inventory/hosts: +[hpc_login] +cluster-login-0 - [cluster_login:children] - openhpc_login +[hpc_compute] +cluster-compute-0 +cluster-compute-1 - [cluster_control:children] - openhpc_login - - [cluster_batch:children] - openhpc_compute - -## Example Playbooks - -To deploy, create a playbook which looks like this: - - --- - - hosts: - - cluster_login - - cluster_control - - cluster_batch - become: yes - roles: - - role: openhpc - openhpc_enable: - control: "{{ inventory_hostname in groups['cluster_control'] }}" - batch: "{{ inventory_hostname in groups['cluster_batch'] }}" - runtime: true - openhpc_slurm_service_enabled: true - openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}" - openhpc_slurm_partitions: - - name: "compute" - openhpc_cluster_name: openhpc - openhpc_packages: [] - ... +[hpc_control] +cluster-control +``` +```yaml +#playbook.yml +--- +- hosts: all + become: yes + tasks: + - import_role: + name: stackhpc.openhpc + vars: + openhpc_cluster_name: hpc + openhpc_enable: + control: "{{ inventory_hostname in groups['cluster_control'] }}" + batch: "{{ inventory_hostname in groups['cluster_compute'] }}" + runtime: true + openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}" + openhpc_nodegroups: + - name: compute + openhpc_partitions: + - name: compute --- +``` + +### Multiple nodegroups + +This example shows how partitions can span multiple types of compute node. + +This example inventory describes three types of compute node (login and +control nodes are omitted for brevity): + +```ini +# inventory/hosts: +... +[hpc_general] +# standard compute nodes +cluster-general-0 +cluster-general-1 + +[hpc_large] +# large memory nodes +cluster-largemem-0 +cluster-largemem-1 + +[hpc_gpu] +# GPU nodes +cluster-a100-0 +cluster-a100-1 +... +``` + +Firstly the `openhpc_nodegroups` is set to capture these inventory groups and +apply any node-level parameters - in this case the `largemem` nodes have +2x cores reserved for some reason, and GRES is configured for the GPU nodes: + +```yaml +openhpc_cluster_name: hpc +openhpc_nodegroups: + - name: general + - name: large + node_params: + CoreSpecCount: 2 + - name: gpu + gres: + - conf: gpu:A100:2 + file: /dev/nvidia[0-1] +``` + +Now two partitions can be configured - a default one with a short timelimit and +no large memory nodes for testing jobs, and another with all hardware and longer +job runtime for "production" jobs: + +```yaml +openhpc_partitions: + - name: test + nodegroups: + - general + - gpu + maxtime: '1:0:0' # 1 hour + default: 'YES' + - name: general + nodegroups: + - general + - large + - gpu + maxtime: '2-0' # 2 days + default: 'NO' +``` +Users will select the partition using `--partition` argument and request nodes +with appropriate memory or GPUs using the `--mem` and `--gres` or `--gpus*` +options for `sbatch` or `srun`. + +Finally here some additional configuration must be provided for GRES: +```yaml +openhpc_config: + GresTypes: + -gpu +``` 1 Slurm 20.11 removed `accounting_storage/filetxt` as an option. This version of Slurm was introduced in OpenHPC v2.1 but the OpenHPC repos are common to all OpenHPC v2.x releases. [↩](#accounting_storage) diff --git a/defaults/main.yml b/defaults/main.yml index c465fa7..ea91c75 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -4,7 +4,8 @@ openhpc_slurm_service_started: "{{ openhpc_slurm_service_enabled }}" openhpc_slurm_service: openhpc_slurm_control_host: "{{ inventory_hostname }}" #openhpc_slurm_control_host_address: -openhpc_slurm_partitions: [] +openhpc_partitions: "{{ openhpc_nodegroups }}" +openhpc_nodegroups: [] openhpc_cluster_name: openhpc_packages: - slurm-libpmi-ohpc diff --git a/molecule/README.md b/molecule/README.md index c697f49..d96f669 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -10,7 +10,7 @@ test1 | 1 | N | 2x compute node, sequential na test1b | 1 | N | 1x compute node test1c | 1 | N | 2x compute nodes, nonsequential names test2 | 2 | N | 4x compute node, sequential names -test3 | 1 | Y | - +test3 | 1 | Y | 4x compute nodes in 2x groups, single partition test4 | 1 | N | 2x compute node, accounting enabled test5 | 1 | N | As for #1 but configless test6 | 1 | N | 0x compute nodes, configless @@ -21,7 +21,7 @@ test10 | 1 | N | As for #5 but then tries to ad test11 | 1 | N | As for #5 but then deletes a node (actually changes the partition due to molecule/ansible limitations) test12 | 1 | N | As for #5 but enabling job completion and testing `sacct -c` test13 | 1 | N | As for #5 but tests `openhpc_config` variable. -test14 | 1 | N | As for #5 but also tests `extra_nodes` via State=DOWN nodes. +test14 | 1 | N | [removed, extra_nodes removed] test15 | 1 | Y | As for #5 but also tests `partitions with different name but with the same NodeName`. diff --git a/molecule/test1/converge.yml b/molecule/test1/converge.yml index 0408415..c4b1c1e 100644 --- a/molecule/test1/converge.yml +++ b/molecule/test1/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc tasks: diff --git a/molecule/test10/converge.yml b/molecule/test10/converge.yml index aa862b4..a11f0b4 100644 --- a/molecule/test10/converge.yml +++ b/molecule/test10/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test10/verify.yml b/molecule/test10/verify.yml index 3102f92..612b05c 100644 --- a/molecule/test10/verify.yml +++ b/molecule/test10/verify.yml @@ -29,7 +29,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test11/converge.yml b/molecule/test11/converge.yml index d4be087..77d7342 100644 --- a/molecule/test11/converge.yml +++ b/molecule/test11/converge.yml @@ -11,7 +11,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute_orig" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test11/verify.yml b/molecule/test11/verify.yml index 65cfbc8..71debb3 100644 --- a/molecule/test11/verify.yml +++ b/molecule/test11/verify.yml @@ -26,7 +26,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute_new" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test12/converge.yml b/molecule/test12/converge.yml index 1dc97b6..348460b 100644 --- a/molecule/test12/converge.yml +++ b/molecule/test12/converge.yml @@ -11,7 +11,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test13/converge.yml b/molecule/test13/converge.yml index 6cfe96c..5270c06 100644 --- a/molecule/test13/converge.yml +++ b/molecule/test13/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_control'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test14/converge.yml b/molecule/test14/converge.yml deleted file mode 100644 index f7db48c..0000000 --- a/molecule/test14/converge.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- -- name: Converge - hosts: all - vars: - openhpc_enable: - control: "{{ inventory_hostname in groups['testohpc_login'] }}" - batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" - runtime: true - openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: - - name: "compute" - extra_nodes: - # Need to specify IPs for the non-existent State=DOWN nodes, because otherwise even in this state slurmctld will exclude a node with no lookup information from the config. - # We use invalid IPs here (i.e. starting 0.) to flag the fact the nodes shouldn't exist. - # Note this has to be done via slurm config rather than /etc/hosts due to Docker limitations on modifying the latter. - - NodeName: fake-x,fake-y - NodeAddr: 0.42.42.0,0.42.42.1 - State: DOWN - CPUs: 1 - - NodeName: fake-2cpu-[3,7-9] - NodeAddr: 0.42.42.3,0.42.42.7,0.42.42.8,0.42.42.9 - State: DOWN - CPUs: 2 - openhpc_cluster_name: testohpc - openhpc_slurm_configless: true - tasks: - - name: "Include ansible-role-openhpc" - include_role: - name: "{{ lookup('env', 'MOLECULE_PROJECT_DIRECTORY') | basename }}" diff --git a/molecule/test14/molecule.yml b/molecule/test14/molecule.yml deleted file mode 100644 index 29d30ca..0000000 --- a/molecule/test14/molecule.yml +++ /dev/null @@ -1,44 +0,0 @@ ---- -driver: - name: podman -platforms: - - name: testohpc-login-0 - image: ${MOLECULE_IMAGE} - pre_build_image: true - groups: - - testohpc_login - command: /sbin/init - tmpfs: - - /run - - /tmp - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - network: net1 - - name: testohpc-compute-0 - image: ${MOLECULE_IMAGE} - pre_build_image: true - groups: - - testohpc_compute - command: /sbin/init - tmpfs: - - /run - - /tmp - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - network: net1 - - name: testohpc-compute-1 - image: ${MOLECULE_IMAGE} - pre_build_image: true - groups: - - testohpc_compute - command: /sbin/init - tmpfs: - - /run - - /tmp - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - network: net1 -provisioner: - name: ansible -verifier: - name: ansible diff --git a/molecule/test14/verify.yml b/molecule/test14/verify.yml deleted file mode 100644 index c5e903d..0000000 --- a/molecule/test14/verify.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- - -- name: Check slurm hostlist - hosts: testohpc_login - tasks: - - name: Get slurm partition info - command: sinfo --noheader --format="%P,%a,%l,%D,%t,%N" # using --format ensures we control whitespace - register: sinfo - - name: - assert: # PARTITION AVAIL TIMELIMIT NODES STATE NODELIST - that: "sinfo.stdout_lines == ['compute*,up,60-00:00:00,6,down*,fake-2cpu-[3,7-9],fake-x,fake-y', 'compute*,up,60-00:00:00,2,idle,testohpc-compute-[0-1]']" - fail_msg: "FAILED - actual value: {{ sinfo.stdout_lines }}" diff --git a/molecule/test15/converge.yml b/molecule/test15/converge.yml index 7ec3109..28e496c 100644 --- a/molecule/test15/converge.yml +++ b/molecule/test15/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" partition_params: PreemptMode: requeue diff --git a/molecule/test1b/converge.yml b/molecule/test1b/converge.yml index 0408415..c4b1c1e 100644 --- a/molecule/test1b/converge.yml +++ b/molecule/test1b/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc tasks: diff --git a/molecule/test1c/converge.yml b/molecule/test1c/converge.yml index 341cd9e..9b2c8a9 100644 --- a/molecule/test1c/converge.yml +++ b/molecule/test1c/converge.yml @@ -8,7 +8,7 @@ runtime: true openhpc_slurm_service_enabled: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc tasks: diff --git a/molecule/test2/converge.yml b/molecule/test2/converge.yml index 1433682..000c5f3 100644 --- a/molecule/test2/converge.yml +++ b/molecule/test2/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "part1" - name: "part2" openhpc_cluster_name: testohpc diff --git a/molecule/test3/converge.yml b/molecule/test3/converge.yml index 7805064..77f5cdb 100644 --- a/molecule/test3/converge.yml +++ b/molecule/test3/converge.yml @@ -7,15 +7,16 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: - - name: "compute" - groups: - - name: "grp1" - - name: "grp2" + openhpc_nodegroups: + - name: grp1 + - name: grp2 + openhpc_partitions: + - name: compute + nodegroups: + - grp1 + - grp2 openhpc_cluster_name: testohpc tasks: - name: "Include ansible-role-openhpc" include_role: name: "{{ lookup('env', 'MOLECULE_PROJECT_DIRECTORY') | basename }}" - - diff --git a/molecule/test4/converge.yml b/molecule/test4/converge.yml index 47c73bf..60e23e6 100644 --- a/molecule/test4/converge.yml +++ b/molecule/test4/converge.yml @@ -12,7 +12,7 @@ openhpc_slurmdbd_mysql_password: secure-password openhpc_slurmdbd_mysql_username: slurm openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_accounting_storage_client_package: mariadb diff --git a/molecule/test5/converge.yml b/molecule/test5/converge.yml index 0ac4e91..58a465d 100644 --- a/molecule/test5/converge.yml +++ b/molecule/test5/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test5/molecule.yml b/molecule/test5/molecule.yml index 29d30ca..4f7c357 100644 --- a/molecule/test5/molecule.yml +++ b/molecule/test5/molecule.yml @@ -9,8 +9,8 @@ platforms: - testohpc_login command: /sbin/init tmpfs: - - /run - - /tmp + /run: rw + /tmp: rw volumes: - /sys/fs/cgroup:/sys/fs/cgroup:ro network: net1 @@ -21,8 +21,8 @@ platforms: - testohpc_compute command: /sbin/init tmpfs: - - /run - - /tmp + /run: rw + /tmp: rw volumes: - /sys/fs/cgroup:/sys/fs/cgroup:ro network: net1 @@ -33,8 +33,8 @@ platforms: - testohpc_compute command: /sbin/init tmpfs: - - /run - - /tmp + /run: rw + /tmp: rw volumes: - /sys/fs/cgroup:/sys/fs/cgroup:ro network: net1 diff --git a/molecule/test6/converge.yml b/molecule/test6/converge.yml index 52d6d50..8904633 100644 --- a/molecule/test6/converge.yml +++ b/molecule/test6/converge.yml @@ -6,7 +6,7 @@ control: "{{ inventory_hostname in groups['testohpc_login'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_login'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "n/a" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test8/converge.yml b/molecule/test8/converge.yml index 33a8e23..4aad845 100644 --- a/molecule/test8/converge.yml +++ b/molecule/test8/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_control'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/molecule/test9/converge.yml b/molecule/test9/converge.yml index 33a8e23..4aad845 100644 --- a/molecule/test9/converge.yml +++ b/molecule/test9/converge.yml @@ -7,7 +7,7 @@ batch: "{{ inventory_hostname in groups['testohpc_compute'] }}" runtime: true openhpc_slurm_control_host: "{{ groups['testohpc_control'] | first }}" - openhpc_slurm_partitions: + openhpc_nodegroups: - name: "compute" openhpc_cluster_name: testohpc openhpc_slurm_configless: true diff --git a/tasks/main.yml b/tasks/main.yml index 1ec95d0..bd10aaa 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -1,5 +1,11 @@ --- +- name: Validate configuration + block: + - include_tasks: validate.yml + when: openhpc_enable.runtime | default(false) | bool + tags: install + - name: Install packages block: - include_tasks: install.yml diff --git a/tasks/runtime.yml b/tasks/runtime.yml index e1881d3..a19aa89 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -2,15 +2,6 @@ - include_tasks: pre.yml -- name: Check openhpc_slurm_control_host, openhpc_cluster_name or openhpc_slurm_partitions exist - assert: - that: - - openhpc_slurm_control_host is defined - - openhpc_cluster_name is defined - - openhpc_cluster_name != '' - - openhpc_slurm_partitions is defined - fail_msg: "Undefined openhpc_slurm_control_host, openhpc_cluster_name or openhpc_slurm_partitions." - - name: Fail if control host not in play and munge key not specified fail: msg: "Either the slurm control node must be in the play or `openhpc_munge_key` must be set" diff --git a/tasks/validate.yml b/tasks/validate.yml new file mode 100644 index 0000000..0ed2409 --- /dev/null +++ b/tasks/validate.yml @@ -0,0 +1,29 @@ +- name: Check openhpc_slurm_control_host and openhpc_cluster_name + assert: + that: + - openhpc_slurm_control_host is defined + - openhpc_slurm_control_host != '' + - openhpc_cluster_name is defined + - openhpc_cluster_name != '' + fail_msg: openhpc role variables not correctly defined, see detail above + delegate_to: localhost + run_once: true + +- name: Check no host appears in more than one nodegroup + assert: + that: "{{ _openhpc_check_hosts.values() | select('greaterthan', 1) | length == 0 }}" + fail_msg: | + Some hosts appear more than once in inventory groups {{ _openhpc_node_inventory_groups | join(', ') }}: + {{ _openhpc_check_hosts | dict2items | rejectattr('value', 'equalto', 1) | items2dict | to_nice_yaml }} + vars: + _openhpc_node_inventory_groups: "{{ openhpc_nodegroups | map(attribute='name') | map('regex_replace', '^', openhpc_cluster_name ~ '_') }}" + _openhpc_check_hosts: "{{ groups | dict2items | list | selectattr('key', 'in', _openhpc_node_inventory_groups) | map(attribute='value') | flatten | community.general.counter }}" + delegate_to: localhost + run_once: true + +- name: Fail if configuration is old + assert: + that: openhpc_slurm_partitions is not defined + fail_msg: stackhpc.openhpc parameter openhpc_slurm_partitions has been replaced - see openhpc_nodegroups and openhpc_partitions + delegate_to: localhost + run_once: true diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index a6fa27b..bc23ed5 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,16 +1,11 @@ AutoDetect=off -{% for part in openhpc_slurm_partitions %} -{% set nodelist = [] %} -{% for group in part.get('groups', [part]) %} -{% if 'gres' in group %} -{% for gres in group.gres %} -{% set gres_name, gres_type, _ = gres.conf.split(':') %} -{% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %} -{% set inventory_group_hosts = groups.get(group_name, []) %} -{% for hostlist in (inventory_group_hosts | hostlist_expression) %} +{% for nodegroup in openhpc_nodegroups %} +{% for gres in nodegroup.gres | default([]) %} +{% set gres_name, gres_type, _ = gres.conf.split(':') %} +{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} +{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% for hostlist in (inventory_group_hosts | hostlist_expression) %} NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file }} -{% endfor %} -{% endfor %} -{% endif %} -{% endfor %} -{% endfor %} +{% endfor %}{# hostlists #} +{% endfor %}{# gres #} +{% endfor %}{# nodegroup #} diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 94f0465..3d29994 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -135,9 +135,6 @@ SlurmdSyslogDebug=info #SlurmSchedLogFile= #SlurmSchedLogLevel= #DebugFlags= -# -# -# POWER SAVE SUPPORT FOR IDLE NODES - NOT SUPPORTED IN THIS APPLIANCE VERSION # LOGIN-ONLY NODES # Define slurmd nodes not in partitions for login-only nodes in "configless" mode: @@ -145,45 +142,49 @@ SlurmdSyslogDebug=info NodeName={{ node }} {% endfor %}{% endif %} -# COMPUTE NODES -# OpenHPC default configuration PropagateResourceLimitsExcept=MEMLOCK Epilog=/etc/slurm/slurm.epilog.clean -{% set donehosts = [] %} -{% for part in openhpc_slurm_partitions %} - {% set nodelist = [] %} - {% for group in part.get('groups', [part]) %} - {% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %} -# openhpc_slurm_partitions group: {{ group_name }} - {% set inventory_group_hosts = groups.get(group_name, []) %} - {% if inventory_group_hosts | length > 0 %} - {% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %} - {% set first_host = play_group_hosts | first | mandatory('Group "' ~ group_name ~ '" contains no hosts in this play - was --limit used?') %} - {% set first_host_hv = hostvars[first_host] %} - {% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (group.ram_multiplier | default(openhpc_ram_multiplier))) | int %} - {% for hostlist in (inventory_group_hosts | hostlist_expression) %} - {% set gres = ' Gres=%s' % (','.join(group.gres | map(attribute='conf') )) if 'gres' in group else '' %} - {% if hostlist not in donehosts %} -NodeName={{ hostlist }} State=UNKNOWN RealMemory={{ group.get('ram_mb', ram_mb) }} Sockets={{first_host_hv['ansible_processor_count']}} CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }}{{ gres }} - {% endif %} - {% set _ = nodelist.append(hostlist) %} - {% set _ = donehosts.append(hostlist) %} - {% endfor %}{# nodes #} - {% endif %}{# inventory_group_hosts #} - {% for extra_node_defn in group.get('extra_nodes', []) %} -{{ extra_node_defn.items() | map('join', '=') | join(' ') }} - {% set _ = nodelist.append(extra_node_defn['NodeName']) %} - {% endfor %} - {% endfor %}{# group #} -{% if not nodelist %}{# empty partition #} -{% set nodelist = ['""'] %} -{% endif %} -PartitionName={{part.name}} Default={{ part.get('default', 'YES') }} MaxTime={{ part.get('maxtime', openhpc_job_maxtime) }} State=UP Nodes={{ nodelist | join(',') }} {{ part.partition_params | default({}) | dict2parameters }} -{% endfor %}{# partitions #} + +# COMPUTE NODES +{% for nodegroup in openhpc_nodegroups %} +# nodegroup: {{ nodegroup.name }} +{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} +{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% if inventory_group_hosts | length > 0 %} +{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %} +{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %} +{% set first_host_hv = hostvars[first_host] %} +{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %} +{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #} +NodeName={{ hostlists | join(',') }} {{ '' -}} + Features={{ (['nodegroup_' ~ nodegroup.name] + nodegroup.features | default([]) ) | join(',') }} {{ '' -}} + State=UNKNOWN {{ '' -}} + RealMemory={{ nodegroup.ram_mb | default(ram_mb) }} {{ '' -}} + Sockets={{ first_host_hv['ansible_processor_count'] }} {{ '' -}} + CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} {{ '' -}} + ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }} {{ '' -}} + {{ nodegroup.node_params | default({}) | dict2parameters }} {{ '' -}} + {% if 'gres' in nodegroup %}Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) }}{% endif %} + +{% endif %}{# 1 or more hosts in inventory #} +NodeSet=nodegroup_{{ nodegroup.name }} Feature=nodegroup_{{ nodegroup.name }} + +{% endfor %} # Define a non-existent node, in no partition, so that slurmctld starts even with all partitions empty NodeName=nonesuch +# PARTITIONS +{% for partition in openhpc_partitions %} +PartitionName={{partition.name}} {{ '' -}} + Default={{ partition.get('default', 'YES') }} {{ '' -}} + MaxTime={{ partition.get('maxtime', openhpc_job_maxtime) }} {{ '' -}} + State=UP {{ '' -}} + Nodes={{ partition.get('nodegroups', [partition.name]) | map('regex_replace', '^', 'nodegroup_') | join(',') }} {{ '' -}} + {{ partition.partition_params | default({}) | dict2parameters }} +{% endfor %}{# openhpc_partitions #} + {% if openhpc_slurm_configless | bool %}SlurmctldParameters=enable_configless{% endif %} + ReturnToService=2