diff --git a/README.md b/README.md index 022c9ab..841c15b 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ each list element: `openhpc_slurm_configless`: Optional, default false. If true then slurm's ["configless" mode](https://slurm.schedmd.com/configless_slurm.html) is used. -`openhpc_munge_key`: Optional. Define a munge key to use. If not provided then one is generated but the `openhpc_slurm_control_host` must be in the play. +`openhpc_munge_key_b64`: Optional. A base-64 encoded munge key. If not provided then the one generated on package install is used, but the `openhpc_slurm_control_host` must be in the play. `openhpc_login_only_nodes`: Optional. If using "configless" mode specify the name of an ansible group containing nodes which are login-only nodes (i.e. not also control nodes), if required. These nodes will run `slurmd` to contact the control node for config. diff --git a/defaults/main.yml b/defaults/main.yml index 94ba868..bb06672 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -125,7 +125,7 @@ ohpc_default_extra_repos: # Concatenate all repo definitions here ohpc_repos: "{{ ohpc_openhpc_repos[ansible_distribution_major_version] + ohpc_default_extra_repos[ansible_distribution_major_version] + openhpc_extra_repos }}" -openhpc_munge_key: +openhpc_munge_key_b64: openhpc_login_only_nodes: '' openhpc_module_system_install: true diff --git a/tasks/runtime.yml b/tasks/runtime.yml index b08a451..0ba2b12 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -18,31 +18,17 @@ state: directory when: inventory_hostname == openhpc_slurm_control_host -- name: Generate a Munge key on control host - # NB this is usually a no-op as the package install actually generates a (node-unique) one, so won't usually trigger handler - command: "dd if=/dev/urandom of=/etc/munge/munge.key bs=1 count=1024" - args: - creates: "/etc/munge/munge.key" - when: inventory_hostname == openhpc_slurm_control_host - - name: Retrieve Munge key from control host + # package install generates a node-unique one slurp: src: "/etc/munge/munge.key" register: openhpc_control_munge_key delegate_to: "{{ openhpc_slurm_control_host }}" when: openhpc_slurm_control_host in ansible_play_hosts -- name: Fix permissions on /etc to pass Munge startup checks - # Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2 makes /etc g=rwx rather than g=rx (where group=root) - # which fails munged startup checks - file: - path: /etc - state: directory - mode: g-w - - name: Write Munge key copy: - content: "{{ openhpc_munge_key or (openhpc_control_munge_key.content | b64decode) }}" + content: "{{ (openhpc_munge_key_b64 or openhpc_control_munge_key.content) | b64decode }}" dest: "/etc/munge/munge.key" owner: munge group: munge diff --git a/tasks/validate.yml b/tasks/validate.yml index b1bc104..0a58c99 100644 --- a/tasks/validate.yml +++ b/tasks/validate.yml @@ -40,9 +40,16 @@ loop: "{{ _openhpc_gres_autodetect_groups }}" run_once: true -- name: Fail if configuration is old +- name: Fail if partition configuration is outdated assert: that: openhpc_slurm_partitions is not defined fail_msg: stackhpc.openhpc parameter openhpc_slurm_partitions has been replaced - see openhpc_nodegroups and openhpc_partitions delegate_to: localhost run_once: true + +- name: Fail if munge key configuration is outdated + assert: + that: openhpc_munge_key is not defined + fail_msg: stackhpc.openhpc parameter openhpc_munge_key has been replaced with openhpc_munge_key_b64 + delegate_to: localhost + run_once: true