From cdd50f2e3a810c4ec55abdb96150617a6f03b51e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 May 2024 13:24:34 +0000 Subject: [PATCH 01/13] create file-based inventory to permit multiple ansible commands --- ansible/roles/cluster_infra/defaults/main.yml | 1 + ansible/roles/cluster_infra/tasks/main.yml | 14 ++++++++ .../templates/cluster_hosts.yml.j2 | 32 +++++++++++++++++++ environments/.caas/hooks/pre.yml | 1 + 4 files changed, 48 insertions(+) create mode 100644 ansible/roles/cluster_infra/defaults/main.yml create mode 100644 ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 diff --git a/ansible/roles/cluster_infra/defaults/main.yml b/ansible/roles/cluster_infra/defaults/main.yml new file mode 100644 index 000000000..81830ddb7 --- /dev/null +++ b/ansible/roles/cluster_infra/defaults/main.yml @@ -0,0 +1 @@ +cluster_infra_inventory_file: "{{ appliances_environment_root }}/inventory/cluster_hosts.yml" diff --git a/ansible/roles/cluster_infra/tasks/main.yml b/ansible/roles/cluster_infra/tasks/main.yml index 5976b133c..06092b737 100644 --- a/ansible/roles/cluster_infra/tasks/main.yml +++ b/ansible/roles/cluster_infra/tasks/main.yml @@ -1,3 +1,11 @@ +- name: Ensure inventory hosts file does not exist + # This is a bit subtle; we're going to create this from the in-memory inventory to + # permit multiple ansible commands. Hosts cannot be deleted from inventory + # (even by a meta: refresh_inventory) so we must ensure inventory is empty + file: + path: "{{ cluster_infra_inventory_file }}" + state: absent + - debug: msg: | terraform_backend_type: {{ terraform_backend_type }} @@ -60,3 +68,9 @@ - name: Provision infrastructure include_role: name: stackhpc.terraform.infra + +- name: Create inventory hosts file to permit ad-hoc uses + template: + src: cluster_hosts.yml.j2 + dest: "{{ cluster_infra_inventory_file }}" + when: terraform_state != 'absent' diff --git a/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 b/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 new file mode 100644 index 000000000..f406600a0 --- /dev/null +++ b/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 @@ -0,0 +1,32 @@ +all: + vars: + # TODO: cluster_domain_suffix: + ansible_ssh_common_args: "{{ cluster_ssh_common_args }}" + ansible_ssh_private_key_file: "{{ cluster_ssh_private_key_file }}" +control: + hosts: +{% for control in groups['control'] %} + {{ control }}: + ansible_host: {{ hostvars[control].ansible_host }} + # TODO: instance_id: +{% endfor %} + +login: + hosts: +{% for login in groups['login'] %} + {{ login }}: + ansible_host: {{ hostvars[login].ansible_host }} + # TODO: instance_id: +{% endfor %} + +{{ cluster_name }}_standard: + hosts: +{% for compute in groups[cluster_name + '_standard'] %} + {{ compute }}: + ansible_host: {{ hostvars[compute].ansible_host }} + # TODO: instance_id: +{% endfor %} + +compute: + children: + {{ cluster_name }}_standard: diff --git a/environments/.caas/hooks/pre.yml b/environments/.caas/hooks/pre.yml index 05b0255c8..e8c025ce5 100644 --- a/environments/.caas/hooks/pre.yml +++ b/environments/.caas/hooks/pre.yml @@ -3,6 +3,7 @@ # Provision the infrastructure using Terraform - name: Provision infrastructure hosts: openstack + tags: infra roles: - cluster_infra From 399877c598311e34aa206c077cb223353600b760 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 24 May 2024 15:37:45 +0000 Subject: [PATCH 02/13] add extravars for manual deployment/CI --- environments/.caas/.gitignore | 1 + environments/.caas/CI-extravars.yml | 47 +++++++++++++++++++++++++++++ environments/.caas/README.md | 14 +++++++-- environments/.caas/activate | 12 ++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 environments/.caas/.gitignore create mode 100644 environments/.caas/CI-extravars.yml create mode 100644 environments/.caas/activate diff --git a/environments/.caas/.gitignore b/environments/.caas/.gitignore new file mode 100644 index 000000000..53c95c9b9 --- /dev/null +++ b/environments/.caas/.gitignore @@ -0,0 +1 @@ +inventory/group_vars/all/CI-extravars.yml diff --git a/environments/.caas/CI-extravars.yml b/environments/.caas/CI-extravars.yml new file mode 100644 index 000000000..119a7bc8a --- /dev/null +++ b/environments/.caas/CI-extravars.yml @@ -0,0 +1,47 @@ +# Variables in this file are normally injected by Azimuth/CaaS. This file is moved into +# the inventory by the CI workflow or the activate script + +# -- Environment lookups -- +# Set either by the workflow for CI or by the activate script for dev/debugging +cluster_name: "{{ lookup('env', 'CI_CLUSTER_NAME') | default('dev') }}" # TODO: potentially want to override this for dev +distro_version: "{{ lookup('env', 'DISTRO_VERSION') | default('RL9') }}" +ci_cloud: "{{ lookup('env', 'CI_CLOUD') }}" # set by GH actions +# -- + +# Use tiny volumes for CI +state_volume_size: 10 +home_volume_size: 10 + +# Likely to have multiple (identical) images with same name in dev due to image build + community image deploys +cluster_image_name: + RL8: openhpc-RL8-240423-1002-4b09ba85 + RL9: openhpc-ofed-RL9-240423-1059-4b09ba85 +cluster_image: "{{ (lookup('pipe', 'openstack image list -f yaml') | from_yaml | selectattr('Name', 'eq', cluster_image_name[distro_version]) | first)['ID'] }}" + +compute_count: 2 +cluster_run_validation: true # enable hpctests +cluster_id: "{{ cluster_name }}" # TODO not sure what this should be +cluster_user_ssh_public_key: "{{ lookup('file', lookup('fileglob', '~/.ssh/*.pub') | split(',') | first) }}" + +ci_cloud_vars: # cloud-specific vars: + LEAFCLOUD: + control_flavor_name: ec1.medium # gets down to ~100Mi mem free on deployment + login_flavor_name: en1.xsmall + compute_flavor: en1.xsmall + cluster_external_network: external + use_home_volume_type_fast: true + home_volume_type_fast: unencrypted + # TODO: can't currently select state volume type as unencrypted + ARCUS: + control_flavor_name: vm.ska.cpu.general.eighth + login_flavor_name: vm.ska.cpu.general.small + compute_flavor: vm.ska.cpu.general.small + cluster_external_network: CUDN-Internet + use_home_volume_type_fast: false + +login_flavor_name: "{{ ci_cloud_vars[ci_cloud].login_flavor_name }}" +control_flavor_name: "{{ ci_cloud_vars[ci_cloud].control_flavor_name }}" +compute_flavor: "{{ ci_cloud_vars[ci_cloud].compute_flavor }}" +cluster_external_network: "{{ ci_cloud_vars[ci_cloud].cluster_external_network }}" +use_home_volume_type_fast: "{{ ci_cloud_vars[ci_cloud].use_home_volume_type_fast }}" +home_volume_type_fast: "{{ ci_cloud_vars[ci_cloud].home_volume_type_fast }}" diff --git a/environments/.caas/README.md b/environments/.caas/README.md index 4a08433b0..de2844b6e 100644 --- a/environments/.caas/README.md +++ b/environments/.caas/README.md @@ -1,9 +1,10 @@ # Caas cluster -Environment for default Azimuth Slurm. This is not intended to be manually deployed. +Environment for Azimuth CaaS Slurm. This is also used for CI and may be manually deployed +for debugging and development. It should *not* be used for a non-CaaS Slurm cluster. Non-standard things for this environment: -- There is no activate script. +- The `activate` script is provided *only* for development/debugging. - `ansible.cgf` is provided in the repo root, as expected by the caas operator. - `ANSIBLE_INVENTORY` is set in the cluster type template, using a path relative to the runner project directory: @@ -16,3 +17,12 @@ Non-standard things for this environment: Ansible then defines `ansible_inventory_sources` which contains absolute paths, and that is used to derive the `appliances_environment_root` and `appliances_repository_root`. + +It is also used for CI, and may be manually deployed for development/debugging as follows: + + . venv/bin/activate + . enviroments/.caas/activate # NB: CI_CLOUD may need changing + ansible-playbook ansible/site.yml #-e cluster_state=absent + +Once deployed or at least the `cluster_infra` role has finished, individual or ad-hoc +playbooks may be run as usual. diff --git a/environments/.caas/activate b/environments/.caas/activate new file mode 100644 index 000000000..93425921e --- /dev/null +++ b/environments/.caas/activate @@ -0,0 +1,12 @@ +export ANSIBLE_INVENTORY=environments/common/inventory,environments/.caas/inventory +echo "Set ANSIBLE_INVENTORY=${ANSIBLE_INVENTORY}" +export OS_CLOUD=openstack +echo "Set OS_CLOUD=${OS_CLOUD} - override if using a non-default clouds.yaml" +export CI_CLOUD=LEAFCLOUD +echo "Set CI_CLOUD=${CI_CLOUD} - override if on another cloud" +export DISTRO_VERSION=RL9 +echo "Set DISTRO_VERSION=${DISTRO_VERSION} - or override for RL8" +cp environments/.caas/CI-extravars.yml environments/.caas/inventory/group_vars/all/CI-extravars.yml +echo "Copied CI extravars into inventory" +export CI_CLUSTER_NAME=dev +echo "Set CI_CLUSTER_NAME=$CI_CLUSTER_NAME - override for multiple dev deployments" From 8f42e7cb5030cc170d3ff8f7f004938fa1e0e5e3 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 10:38:58 +0000 Subject: [PATCH 03/13] fix inventory hosts file logic for caas --- ansible/roles/cluster_infra/tasks/main.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ansible/roles/cluster_infra/tasks/main.yml b/ansible/roles/cluster_infra/tasks/main.yml index 06092b737..971bb8a87 100644 --- a/ansible/roles/cluster_infra/tasks/main.yml +++ b/ansible/roles/cluster_infra/tasks/main.yml @@ -1,10 +1,17 @@ -- name: Ensure inventory hosts file does not exist - # This is a bit subtle; we're going to create this from the in-memory inventory to - # permit multiple ansible commands. Hosts cannot be deleted from inventory - # (even by a meta: refresh_inventory) so we must ensure inventory is empty +- name: Remove inventory hosts file file: path: "{{ cluster_infra_inventory_file }}" state: absent + register: _inventory_file + +- name: Error if inventory hosts file exists + # When creating infra this file must not exist to avoid inventory errors from mismatches + # between this hosts file and in-memory inventory. Note hosts cannot be deleted from + # inventory so ensuring the file does not exist before starting the infra role is the + # only safe approach. + assert: + that: not _inventory_file.changed + fail_msg: "Inventory file has been deleted - rerun the ansible command now" - debug: msg: | From 6d414741b55e6e6858cd399d1eed9d67b9c1328e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 11:02:09 +0000 Subject: [PATCH 04/13] update check-slurm for caas --- ansible/ci/check_slurm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/ci/check_slurm.yml b/ansible/ci/check_slurm.yml index d95c5bb5c..4da8da747 100644 --- a/ansible/ci/check_slurm.yml +++ b/ansible/ci/check_slurm.yml @@ -21,4 +21,4 @@ vars: expected_sinfo: - - "{{ openhpc_cluster_name }}-compute-[0-1] standard* up 60-00:00:00 2 idle" + - "{{ openhpc_cluster_name }}-compute-standard-[0-1] standard* up 60-00:00:00 {{ compute_count }} idle" From 451287a36b17969b1bb63ff2f89b001e1d81ee79 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 13:47:36 +0000 Subject: [PATCH 05/13] let ci playbooks access secrets --- ansible/ci/check_grafana.yml | 4 ++-- ansible/ci/output_vars.yml | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ansible/ci/check_grafana.yml b/ansible/ci/check_grafana.yml index 216cb1ed9..3f4469e02 100644 --- a/ansible/ci/check_grafana.yml +++ b/ansible/ci/check_grafana.yml @@ -2,8 +2,8 @@ # Can't actually check the dashboard programatically so this queries the datasource used by the dashboard instead. - hosts: control # so proxying etc is irrelevant - gather_facts: no - become: no + gather_facts: yes + become: yes tasks: - name: Wait for slurm-stats file to exist (run by cron) ansible.builtin.wait_for: diff --git a/ansible/ci/output_vars.yml b/ansible/ci/output_vars.yml index 0e2bc4c61..b456349ad 100644 --- a/ansible/ci/output_vars.yml +++ b/ansible/ci/output_vars.yml @@ -1,12 +1,13 @@ # Output specific hostvars to a file in a form which can be sourced by bash # NB: obviously the keys and values for the hostvars need to be suitable bash variables - hosts: "{{ output_vars_hosts }}" - gather_facts: no + gather_facts: yes + become: yes tasks: - copy: dest: "{{ output_vars_path }}" content: | {% for item in output_vars_items.split(',') %} - export {{output_vars_prefix | default('') }}{{ item }}={{ lookup('vars', item) }} + export {{output_vars_prefix | default('') }}{{ item }}="{{ lookup('vars', item) }}" {% endfor %} delegate_to: localhost From 819b4d44b88f6c0fb930d1b668f844d39ee548f9 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 13:48:00 +0000 Subject: [PATCH 06/13] allow CI to access cluster --- ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 b/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 index f406600a0..8e7eba528 100644 --- a/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 +++ b/ansible/roles/cluster_infra/templates/cluster_hosts.yml.j2 @@ -1,6 +1,7 @@ all: vars: # TODO: cluster_domain_suffix: + cluster_gateway_ip: "{{ cluster_gateway_ip }}" ansible_ssh_common_args: "{{ cluster_ssh_common_args }}" ansible_ssh_private_key_file: "{{ cluster_ssh_private_key_file }}" control: From 9f8468c385246932ccd68ffacdcbd002cae88a47 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 13:48:34 +0000 Subject: [PATCH 07/13] fix accidental changes --- ansible/extras.yml | 3 +++ environments/.caas/hooks/pre.yml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/extras.yml b/ansible/extras.yml index 445a0cc16..bf09eff2d 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -5,6 +5,9 @@ - users gather_facts: yes tasks: + - debug: + var: basic_users_users.0 + - meta: end_here - import_role: name: basic_users diff --git a/environments/.caas/hooks/pre.yml b/environments/.caas/hooks/pre.yml index e8c025ce5..fe8474928 100644 --- a/environments/.caas/hooks/pre.yml +++ b/environments/.caas/hooks/pre.yml @@ -3,7 +3,7 @@ # Provision the infrastructure using Terraform - name: Provision infrastructure hosts: openstack - tags: infra + tags: cluster_infra roles: - cluster_infra From 3e37aacbb13cf8e404686b4a8de5e825a800611f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 13:49:27 +0000 Subject: [PATCH 08/13] update stackhpc workflow to use .caas --- .github/workflows/stackhpc.yml | 97 ++++++---------------------------- 1 file changed, 15 insertions(+), 82 deletions(-) diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index c8bb9b06f..050065123 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -31,8 +31,10 @@ jobs: env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - TF_VAR_cluster_name: slurmci-${{ matrix.os_version }}-${{ github.run_id }} + CI_CLUSTER_NAME: slurmci-${{ matrix.os_version }}-${{ github.run_id }} CI_CLOUD: ${{ vars.CI_CLOUD }} + ANSIBLE_INVENTORY: environments/common/inventory,environments/.caas/inventory + DISTRO_VERSION: ${{ matrix.os_version }} steps: - uses: actions/checkout@v2 @@ -40,115 +42,52 @@ jobs: run: | echo CI_CLOUD: ${{ vars.CI_CLOUD }} - - name: Setup ssh - run: | - set -x - mkdir ~/.ssh - echo "${{ secrets[format('{0}_SSH_KEY', vars.CI_CLOUD)] }}" > ~/.ssh/id_rsa - chmod 0600 ~/.ssh/id_rsa - shell: bash - - - name: Add bastion's ssh key to known_hosts - run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts - shell: bash - - name: Install ansible etc run: dev/setup-env.sh - - name: Install OpenTofu - uses: opentofu/setup-opentofu@v1 - with: - tofu_version: 1.6.2 - - - name: Initialise terraform - run: terraform init - working-directory: ${{ github.workspace }}/environments/.stackhpc/terraform - - name: Write clouds.yaml run: | mkdir -p ~/.config/openstack/ echo "${{ secrets[format('{0}_CLOUDS_YAML', vars.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml shell: bash - - name: Setup environment-specific inventory/terraform inputs - run: | - . venv/bin/activate - . environments/.stackhpc/activate - ansible-playbook ansible/adhoc/generate-passwords.yml - echo vault_testuser_password: "$TESTUSER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/all/test_user.yml - env: - TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }} - - - name: Provision nodes using fat image - id: provision_servers + - name: Setup CI-specific extravars (including per-cloud vars) run: | . venv/bin/activate - . environments/.stackhpc/activate - cd $APPLIANCES_ENVIRONMENT_ROOT/terraform - terraform apply -auto-approve -var-file="${{ vars.CI_CLOUD }}.tfvars" - env: - TF_VAR_os_version: ${{ matrix.os_version }} + cp environments/.caas/CI-extravars.yml environments/.caas/inventory/group_vars/all/CI-extravars.yml - - name: Delete infrastructure if provisioning failed + - name: Provision and configure cluster, run mpi-based tests and check slurm partitions run: | . venv/bin/activate - . environments/.stackhpc/activate - cd $APPLIANCES_ENVIRONMENT_ROOT/terraform - terraform destroy -auto-approve -var-file="${{ vars.CI_CLOUD }}.tfvars" - if: failure() && steps.provision_servers.outcome == 'failure' - env: - TF_VAR_os_version: ${{ matrix.os_version }} - - - name: Configure cluster - run: | - . venv/bin/activate - . environments/.stackhpc/activate - ansible all -m wait_for_connection ansible-playbook -v ansible/site.yml ansible-playbook -v ansible/ci/check_slurm.yml - - name: Run MPI-based tests - run: | - . venv/bin/activate - . environments/.stackhpc/activate - ansible-playbook -vv ansible/adhoc/hpctests.yml - # - name: Run EESSI tests # run: | # . venv/bin/activate - # . environments/.stackhpc/activate # ansible-playbook -vv ansible/ci/check_eessi.yml - - name: Confirm Open Ondemand is up (via SOCKS proxy) + - name: Confirm Open Ondemand is up (via FIP) run: | . venv/bin/activate - . environments/.stackhpc/activate # load ansible variables into shell: ansible-playbook ansible/ci/output_vars.yml \ - -e output_vars_hosts=openondemand \ + -e output_vars_hosts=control \ -e output_vars_path=$APPLIANCES_ENVIRONMENT_ROOT/vars.txt \ - -e output_vars_items=bastion_ip,bastion_user,openondemand_servername + -e output_vars_items=cluster_gateway_ip,vault_azimuth_user_password source $APPLIANCES_ENVIRONMENT_ROOT/vars.txt - # setup ssh proxying: - sudo apt-get --yes install proxychains - echo proxychains installed - ssh -v -fN -D 9050 ${bastion_user}@${bastion_ip} - echo port 9050 forwarded - # check OOD server returns 200: - statuscode=$(proxychains wget \ + statuscode=$(wget \ --quiet \ --spider \ --server-response \ --no-check-certificate \ - --http-user=testuser \ - --http-password=${TESTUSER_PASSWORD} https://${openondemand_servername} \ + --http-user=azimuth \ + --http-password=${vault_azimuth_user_password} https://${cluster_gateway_ip} \ 2>&1) (echo $statuscode | grep "200 OK") || (echo $statuscode && exit 1) - env: - TESTUSER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }} # - name: Build environment-specific compute image # id: packer_build @@ -171,33 +110,27 @@ jobs: - name: Test reimage of login and control nodes (via rebuild adhoc) run: | . venv/bin/activate - . environments/.stackhpc/activate ansible-playbook -v --limit control,login ansible/adhoc/rebuild.yml ansible all -m wait_for_connection -a 'delay=60 timeout=600' # delay allows node to go down + rm environments/.caas/inventory/cluster_hosts.yml ansible-playbook -v ansible/site.yml ansible-playbook -v ansible/ci/check_slurm.yml - name: Check sacct state survived reimage run: | . venv/bin/activate - . environments/.stackhpc/activate ansible-playbook -vv ansible/ci/check_sacct_hpctests.yml - name: Check MPI-based tests are shown in Grafana run: | . venv/bin/activate - . environments/.stackhpc/activate ansible-playbook -vv ansible/ci/check_grafana.yml - name: Delete infrastructure run: | . venv/bin/activate - . environments/.stackhpc/activate - cd $APPLIANCES_ENVIRONMENT_ROOT/terraform - terraform destroy -auto-approve -var-file="${{ vars.CI_CLOUD }}.tfvars" - if: ${{ success() || cancelled() }} - env: - TF_VAR_os_version: ${{ matrix.os_version }} + ansible-playbook -v ansible/site.yml -e cluster_state=absent + if: ${{ always() }} # success, failure or cancelled # - name: Delete images # run: | From 4548578aa0c241867f1b9946d756ca0f2cbc5f6a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 14:40:04 +0000 Subject: [PATCH 09/13] fix user ssh key lookup in caas --- environments/.caas/CI-extravars.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/.caas/CI-extravars.yml b/environments/.caas/CI-extravars.yml index 119a7bc8a..c31ced405 100644 --- a/environments/.caas/CI-extravars.yml +++ b/environments/.caas/CI-extravars.yml @@ -21,7 +21,7 @@ cluster_image: "{{ (lookup('pipe', 'openstack image list -f yaml') | from_yaml | compute_count: 2 cluster_run_validation: true # enable hpctests cluster_id: "{{ cluster_name }}" # TODO not sure what this should be -cluster_user_ssh_public_key: "{{ lookup('file', lookup('fileglob', '~/.ssh/*.pub') | split(',') | first) }}" +cluster_user_ssh_public_key: "{{ lookup('file', lookup('fileglob', '~/.ssh/*.pub', wantlist=True) | first) }}" ci_cloud_vars: # cloud-specific vars: LEAFCLOUD: From f6c2f6ce5cef9b37915feb0a4922ed18ff01af51 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 14:41:31 +0000 Subject: [PATCH 10/13] fix infra deletion in CI --- .github/workflows/stackhpc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index 050065123..0141273ee 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -129,6 +129,7 @@ jobs: - name: Delete infrastructure run: | . venv/bin/activate + rm environments/.caas/inventory/cluster_hosts.yml ansible-playbook -v ansible/site.yml -e cluster_state=absent if: ${{ always() }} # success, failure or cancelled From 78eaaaa1cf1fb274142143a9fb3877fe1f0b4b55 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 14:58:20 +0000 Subject: [PATCH 11/13] fix accidental changes --- ansible/extras.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ansible/extras.yml b/ansible/extras.yml index bf09eff2d..445a0cc16 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -5,9 +5,6 @@ - users gather_facts: yes tasks: - - debug: - var: basic_users_users.0 - - meta: end_here - import_role: name: basic_users From 39e6d2e6be9329fdd51fb08ea61be2c19cc46e7d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 15:03:28 +0000 Subject: [PATCH 12/13] fix infra deletion --- .github/workflows/stackhpc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index 0141273ee..c87da236f 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -129,7 +129,7 @@ jobs: - name: Delete infrastructure run: | . venv/bin/activate - rm environments/.caas/inventory/cluster_hosts.yml + rm -f environments/.caas/inventory/cluster_hosts.yml ansible-playbook -v ansible/site.yml -e cluster_state=absent if: ${{ always() }} # success, failure or cancelled From ea88ea568dde0d8a6852cd91b8b80f0ea8e05bc9 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 28 May 2024 16:10:47 +0000 Subject: [PATCH 13/13] setup ssh for azimuth user in CI --- .github/workflows/stackhpc.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index c87da236f..c60507aae 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -42,6 +42,9 @@ jobs: run: | echo CI_CLOUD: ${{ vars.CI_CLOUD }} + - name: Setup ssh for azimuth user + run: ssh-keygen -t rsa -q -f "$HOME/.ssh/id_rsa" -N "" + - name: Install ansible etc run: dev/setup-env.sh