diff --git a/README.md b/README.md index 953bdec..53aa989 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,11 @@ Every Ansible deployment automatically deploys an observability stack alongside - `tmp/local-run-DD-MM-YYYY-HH-MM.log` for local deployments - `tmp/ansible-run-DD-MM-YYYY-HH-MM.log` for Ansible deployments - Example: `NETWORK_DIR=local-devnet ./spin-node.sh --node all --logs` +19. `--network` sets the network name label attached to every metric and log stream scraped by the observability stack (Ansible mode only). + - Default: `devnet-3`, set in `parse-env.sh` after argument parsing + - Propagated to Ansible as the `network_name` variable, which is used in `prometheus.yml.j2` and `promtail.yml.j2` templates + - Appears as the `network` label on all Prometheus scrape targets (app, node_exporter, cadvisor) and all Promtail log streams, so you can filter by network in Grafana across multiple environments + - Example: `--network devnet-x` ### Preparing remote servers diff --git a/ansible/playbooks/clean-node-data.yml b/ansible/playbooks/clean-node-data.yml index a583782..adfbe0c 100644 --- a/ansible/playbooks/clean-node-data.yml +++ b/ansible/playbooks/clean-node-data.yml @@ -6,7 +6,7 @@ connection: local gather_facts: no vars: - validator_config_file: "{{ genesis_dir }}/validator-config.yaml" + validator_config_file: "{{ local_validator_config_path | default(genesis_dir + '/validator-config.yaml') }}" tags: - zeam - ream diff --git a/ansible/playbooks/deploy-nodes.yml b/ansible/playbooks/deploy-nodes.yml index 1c1ae9a..8f24766 100644 --- a/ansible/playbooks/deploy-nodes.yml +++ b/ansible/playbooks/deploy-nodes.yml @@ -18,7 +18,7 @@ - deploy - observability vars: - validator_config_file: "{{ genesis_dir }}/validator-config.yaml" + validator_config_file: "{{ local_validator_config_path | default(genesis_dir + '/validator-config.yaml') }}" tasks: - name: Validate validator-config.yaml exists @@ -122,10 +122,7 @@ - name: Sync validator-config.yaml to remote host copy: - # Use the expanded subnet config when --subnets was specified; fall back - # to the standard validator-config.yaml otherwise. The destination is - # always validator-config.yaml so client roles don't need to change. - src: "{{ local_genesis_dir }}/{{ validator_config_basename | default('validator-config.yaml') }}" + src: "{{ local_validator_config_path | default(local_genesis_dir + '/validator-config.yaml') }}" dest: "{{ genesis_dir }}/validator-config.yaml" mode: '0644' force: yes diff --git a/ansible/playbooks/generate-genesis.yml b/ansible/playbooks/generate-genesis.yml index 7006d9c..e2ee5e4 100644 --- a/ansible/playbooks/generate-genesis.yml +++ b/ansible/playbooks/generate-genesis.yml @@ -34,9 +34,19 @@ set_fact: project_root: "{{ project_root_result.stdout }}" + - name: Copy custom validator config to genesis dir if different + copy: + src: "{{ local_validator_config_path }}" + dest: "{{ genesis_dir }}/validator-config.yaml" + mode: '0644' + when: + - local_validator_config_path is defined + - local_validator_config_path != genesis_dir + '/validator-config.yaml' + - name: Run generate-genesis.sh script shell: | - cd "{{ project_root }}" && ./generate-genesis.sh "{{ genesis_dir }}" --mode ansible --offset {{ genesis_time_offset }} + cd "{{ project_root }}" && ./generate-genesis.sh "{{ genesis_dir }}" --mode ansible --offset {{ genesis_time_offset }}{% if local_validator_config_path is defined and local_validator_config_path != '' %} --validator-config "{{ local_validator_config_path }}"{% endif %} + register: genesis_result args: executable: /bin/bash @@ -55,7 +65,7 @@ - name: Extract node names from validator-config.yaml shell: | - yq eval '.validators[].name' "{{ genesis_dir }}/validator-config.yaml" + yq eval '.validators[].name' "{{ local_validator_config_path | default(genesis_dir + '/validator-config.yaml') }}" register: node_names_raw changed_when: false diff --git a/ansible/playbooks/helpers/deploy-single-node.yml b/ansible/playbooks/helpers/deploy-single-node.yml index 97c701b..e633c20 100644 --- a/ansible/playbooks/helpers/deploy-single-node.yml +++ b/ansible/playbooks/helpers/deploy-single-node.yml @@ -9,7 +9,7 @@ - name: Set validator config file paths set_fact: actual_validator_config_file: "{{ genesis_dir }}/validator-config.yaml" - local_validator_config_file: "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + local_validator_config_file: "{{ local_validator_config_path }}" - name: Extract node configuration (from local config) shell: | diff --git a/ansible/playbooks/prepare.yml b/ansible/playbooks/prepare.yml index 03ee3ce..10d5d3d 100644 --- a/ansible/playbooks/prepare.yml +++ b/ansible/playbooks/prepare.yml @@ -224,7 +224,7 @@ - name: Read all node entries for this host from the active validator config vars: - _vc_file: "{{ _genesis_dir + '/' + (validator_config_basename | default('validator-config.yaml')) }}" + _vc_file: "{{ local_validator_config_path | default(_genesis_dir + '/validator-config.yaml') }}" _vc: "{{ lookup('file', _vc_file) | from_yaml }}" _entries: "{{ _vc.validators | selectattr('enrFields.ip', 'equalto', ansible_host) | list }}" set_fact: diff --git a/ansible/playbooks/stop-nodes.yml b/ansible/playbooks/stop-nodes.yml index 6d96bc3..8c6c880 100644 --- a/ansible/playbooks/stop-nodes.yml +++ b/ansible/playbooks/stop-nodes.yml @@ -7,7 +7,7 @@ connection: local gather_facts: yes vars: - validator_config_file: "{{ genesis_dir }}/validator-config.yaml" + validator_config_file: "{{ local_validator_config_path | default(genesis_dir + '/validator-config.yaml') }}" tasks: - name: Validate validator-config.yaml exists diff --git a/ansible/roles/ethlambda/tasks/main.yml b/ansible/roles/ethlambda/tasks/main.yml index fc68d54..f8b467f 100644 --- a/ansible/roles/ethlambda/tasks/main.yml +++ b/ansible/roles/ethlambda/tasks/main.yml @@ -31,7 +31,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: ethlambda_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/gean/tasks/main.yml b/ansible/roles/gean/tasks/main.yml index c89bfad..83fa44d 100644 --- a/ansible/roles/gean/tasks/main.yml +++ b/ansible/roles/gean/tasks/main.yml @@ -27,7 +27,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: gean_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/grandine/tasks/main.yml b/ansible/roles/grandine/tasks/main.yml index cf9cf72..b1018c7 100644 --- a/ansible/roles/grandine/tasks/main.yml +++ b/ansible/roles/grandine/tasks/main.yml @@ -31,7 +31,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: grandine_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/lantern/tasks/main.yml b/ansible/roles/lantern/tasks/main.yml index bb3696d..66b77b2 100644 --- a/ansible/roles/lantern/tasks/main.yml +++ b/ansible/roles/lantern/tasks/main.yml @@ -22,7 +22,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: lantern_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/lighthouse/tasks/main.yml b/ansible/roles/lighthouse/tasks/main.yml index cb8bd02..1491202 100644 --- a/ansible/roles/lighthouse/tasks/main.yml +++ b/ansible/roles/lighthouse/tasks/main.yml @@ -30,7 +30,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: lighthouse_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/nlean/tasks/main.yml b/ansible/roles/nlean/tasks/main.yml index 339b757..03b578c 100644 --- a/ansible/roles/nlean/tasks/main.yml +++ b/ansible/roles/nlean/tasks/main.yml @@ -27,7 +27,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: nlean_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/observability/tasks/main.yml b/ansible/roles/observability/tasks/main.yml index fcead33..545942b 100644 --- a/ansible/roles/observability/tasks/main.yml +++ b/ansible/roles/observability/tasks/main.yml @@ -2,16 +2,18 @@ # Observability role: Deploy cadvisor, node_exporter, prometheus, and promtail # alongside each lean node on remote hosts. -- name: Extract metricsPort from validator-config.yaml +- name: Find all nodes on this host shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .metricsPort" "{{ local_genesis_dir }}/validator-config.yaml" - register: obs_metrics_port_raw + yq eval '[.validators[] | select(.enrFields.ip == "{{ ansible_host }}") | {"name": .name, "metricsPort": .metricsPort}]' \ + "{{ local_validator_config_path }}" -o=json + register: colocated_nodes_raw changed_when: false delegate_to: localhost -- name: Set metricsPort fact +- name: Set colocated nodes fact set_fact: - obs_metrics_port: "{{ obs_metrics_port_raw.stdout | trim }}" + colocated_nodes: "{{ colocated_nodes_raw.stdout | from_json }}" + is_primary_node: "{{ (colocated_nodes_raw.stdout | from_json)[0].name == node_name }}" - name: Create observability config directory file: @@ -32,10 +34,13 @@ mode: '0644' # --- cadvisor (always recreate to ensure correct flags) --- +# Only the first colocated node on each machine manages infra containers +# to avoid parallel rm+run races when multiple nodes share a host. - name: Remove existing cadvisor container command: docker rm -f cadvisor failed_when: false + when: is_primary_node - name: Start cadvisor container command: >- @@ -55,12 +60,14 @@ -v /var/lib/docker/:/var/lib/docker:ro {{ cadvisor_image }} --port={{ cadvisor_port }} + when: is_primary_node # --- node_exporter (always recreate to ensure correct flags) --- - name: Remove existing node_exporter container command: docker rm -f node_exporter failed_when: false + when: is_primary_node - name: Start node_exporter container command: >- @@ -77,6 +84,7 @@ --path.sysfs=/host/sys --path.rootfs=/rootfs --web.listen-address=0.0.0.0:{{ node_exporter_port }} + when: is_primary_node # --- prometheus (always recreate to pick up config/mount changes, data persists on host) --- @@ -90,6 +98,7 @@ - name: Remove existing prometheus container command: docker rm -f prometheus failed_when: false + when: is_primary_node - name: Start prometheus container command: >- @@ -103,12 +112,14 @@ --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.retention.time=15d --web.listen-address=0.0.0.0:{{ prometheus_port }} + when: is_primary_node # --- promtail (always recreate to pick up config/mount changes) --- - name: Remove existing promtail container command: docker rm -f promtail failed_when: false + when: is_primary_node - name: Start promtail container command: >- @@ -122,3 +133,4 @@ {{ promtail_image }} -config.file=/etc/promtail/config.yml -server.http-listen-port={{ promtail_port }} + when: is_primary_node diff --git a/ansible/roles/observability/templates/prometheus.yml.j2 b/ansible/roles/observability/templates/prometheus.yml.j2 index b2e55bf..2449749 100644 --- a/ansible/roles/observability/templates/prometheus.yml.j2 +++ b/ansible/roles/observability/templates/prometheus.yml.j2 @@ -2,22 +2,34 @@ global: scrape_interval: 15s scrape_configs: - - job_name: '{{ node_name }}' +{% for node in colocated_nodes %} + - job_name: '{{ node.name }}' static_configs: - - targets: ['172.17.0.1:{{ obs_metrics_port }}'] + - targets: ['172.17.0.1:{{ node.metricsPort }}'] labels: type: 'app' - node_id: '{{ node_name }}' + instance: '{{ ansible_host }}' + network: '{{ network_name }}' + client_type: '{{ node.name.split("_")[0] }}' +{% endfor %} + - job_name: 'node_exporter' + static_configs: - targets: ['172.17.0.1:{{ node_exporter_port }}'] labels: type: 'node' - node_id: '{{ node_name }}' + instance: '{{ ansible_host }}' + network: '{{ network_name }}' + - job_name: 'cadvisor' + static_configs: - targets: ['172.17.0.1:{{ cadvisor_port }}'] labels: type: 'docker' - node_id: '{{ node_name }}' - relabel_configs: - - source_labels: [node_id] - target_label: instance + instance: '{{ ansible_host }}' + network: '{{ network_name }}' + metric_relabel_configs: + - source_labels: [name] + regex: '([a-z]+)_.*' + target_label: client_type + replacement: '$1' remote_write: - url: {{ remote_write_url }} diff --git a/ansible/roles/observability/templates/promtail.yml.j2 b/ansible/roles/observability/templates/promtail.yml.j2 index ebf0ea0..b399c1e 100644 --- a/ansible/roles/observability/templates/promtail.yml.j2 +++ b/ansible/roles/observability/templates/promtail.yml.j2 @@ -9,20 +9,25 @@ clients: - url: {{ loki_push_url }} scrape_configs: - - job_name: {{ node_name }} + - job_name: {{ ansible_host }} docker_sd_configs: - host: unix:///var/run/docker.sock refresh_interval: 5s filters: - name: name - values: ["{{ node_name }}"] + values: +{% for node in colocated_nodes %} + - "{{ node.name }}" +{% endfor %} relabel_configs: - source_labels: ['__meta_docker_container_name'] regex: '/(.*)' target_label: 'container' - source_labels: ['__meta_docker_container_log_stream'] target_label: 'stream' - - target_label: 'node' - replacement: '{{ node_name }}' - - target_label: 'host' + - source_labels: ['container'] + target_label: 'node' + - target_label: 'instance' replacement: '{{ ansible_host }}' + - target_label: 'network' + replacement: '{{ network_name }}' diff --git a/ansible/roles/peam/tasks/main.yml b/ansible/roles/peam/tasks/main.yml index bea56b0..1ab5a46 100644 --- a/ansible/roles/peam/tasks/main.yml +++ b/ansible/roles/peam/tasks/main.yml @@ -27,7 +27,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: peam_node_config changed_when: false delegate_to: localhost @@ -50,7 +50,7 @@ - name: Extract local validator index from validator-config ordering shell: | - yq eval '.validators[].name' "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" | nl -v0 | awk '$2=="{{ node_name }}" {print $1; exit}' + yq eval '.validators[].name' "{{ local_validator_config_path }}" | nl -v0 | awk '$2=="{{ node_name }}" {print $1; exit}' register: peam_validator_index changed_when: false delegate_to: localhost @@ -68,14 +68,14 @@ - name: Extract total validator count from validator-config.yaml shell: | - yq eval '.validators[].count // 1' "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" | awk '{sum += $1} END {print sum + 0}' + yq eval '.validators[].count // 1' "{{ local_validator_config_path }}" | awk '{sum += $1} END {print sum + 0}' register: peam_total_validator_count changed_when: false delegate_to: localhost - name: Extract attestation committee count from validator-config.yaml shell: | - yq eval '.config.attestation_committee_count // 1' "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval '.config.attestation_committee_count // 1' "{{ local_validator_config_path }}" register: peam_attestation_committee_count_raw changed_when: false delegate_to: localhost diff --git a/ansible/roles/qlean/tasks/main.yml b/ansible/roles/qlean/tasks/main.yml index 336f8bd..651a4a0 100644 --- a/ansible/roles/qlean/tasks/main.yml +++ b/ansible/roles/qlean/tasks/main.yml @@ -29,7 +29,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: qlean_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/ream/tasks/main.yml b/ansible/roles/ream/tasks/main.yml index a86ed4f..f088275 100644 --- a/ansible/roles/ream/tasks/main.yml +++ b/ansible/roles/ream/tasks/main.yml @@ -30,7 +30,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: ream_node_config changed_when: false delegate_to: localhost diff --git a/ansible/roles/zeam/tasks/main.yml b/ansible/roles/zeam/tasks/main.yml index 8f7f30b..a7bd04e 100644 --- a/ansible/roles/zeam/tasks/main.yml +++ b/ansible/roles/zeam/tasks/main.yml @@ -38,7 +38,7 @@ - name: Extract node configuration from validator-config.yaml shell: | - yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ local_validator_config_path }}" register: node_config changed_when: false delegate_to: localhost @@ -76,10 +76,6 @@ state: directory mode: '0755' -- name: Set validator config value - set_fact: - actual_validator_config: "{{ validator_config if validator_config is defined and validator_config != '' else 'genesis_bootnode' }}" - - name: Deploy Zeam node using Docker block: - name: Stop existing Zeam container (if any) @@ -106,7 +102,7 @@ {{ zeam_global_flags }} node --custom_genesis /config - --validator_config {{ actual_validator_config }} + --validator_config /config --data-dir /data --node-id {{ node_name }} --node-key /config/{{ node_name }}.key diff --git a/generate-genesis.sh b/generate-genesis.sh index cd29b23..422c3db 100755 --- a/generate-genesis.sh +++ b/generate-genesis.sh @@ -15,7 +15,7 @@ PK_DOCKER_IMAGE="ethpandaops/eth-beacon-genesis:pk910-leanchain" # ======================================== show_usage() { cat << EOF -Usage: $0 [--mode local|ansible] [--offset ] [--forceKeyGen] +Usage: $0 [--mode local|ansible] [--offset ] [--forceKeyGen] [--validator-config ] Generate genesis configuration files using PK's eth-beacon-genesis tool. Generates: config.yaml, validators.yaml, nodes.yaml, genesis.json, genesis.ssz, and .key files @@ -31,6 +31,7 @@ Options: - ansible: GENESIS_TIME = now + 360 seconds (default) --offset Override genesis time offset in seconds (overrides mode defaults) --forceKeyGen Force regeneration of hash-sig validator keys + --validator-config Path to a custom validator-config.yaml (default: /validator-config.yaml) Examples: $0 local-devnet/genesis # Local mode (30s offset) @@ -118,6 +119,15 @@ while [[ $# -gt 0 ]]; do exit 1 fi ;; + --validator-config) + if [ -n "$2" ] && [ "${2:0:1}" != "-" ]; then + VALIDATOR_CONFIG_FILE="$2" + shift 2 + else + echo "❌ Error: --validator-config requires a path" + exit 1 + fi + ;; *) shift ;; @@ -403,6 +413,14 @@ echo " Docker image: $PK_DOCKER_IMAGE" echo " Command: leanchain" echo "" +# If validator config is external (not already inside genesis dir), copy it in +# so the Docker container can find it at the expected /data/genesis/validator-config.yaml path +GENESIS_VALIDATOR_CONFIG="$GENESIS_DIR/validator-config.yaml" +if [ "$VALIDATOR_CONFIG_FILE" != "$GENESIS_VALIDATOR_CONFIG" ]; then + cp "$VALIDATOR_CONFIG_FILE" "$GENESIS_VALIDATOR_CONFIG" + echo " Copied external validator config to genesis dir" +fi + # Convert to absolute path for docker volume mount GENESIS_DIR_ABS="$(cd "$GENESIS_DIR" && pwd)" PARENT_DIR_ABS="$(cd "$GENESIS_DIR/.." && pwd)" diff --git a/parse-env.sh b/parse-env.sh index 6dfc0d0..51e56e1 100755 --- a/parse-env.sh +++ b/parse-env.sh @@ -3,21 +3,20 @@ if [ -n "$NETWORK_DIR" ] then - echo "setting up network from $scriptDir/$NETWORK_DIR" - configDir="$scriptDir/$NETWORK_DIR/genesis" - dataDir="$scriptDir/$NETWORK_DIR/data" + # Support both absolute paths and relative paths (relative to scriptDir) + if [[ "$NETWORK_DIR" = /* ]]; then + _resolved_network_dir="$NETWORK_DIR" + else + _resolved_network_dir="$scriptDir/$NETWORK_DIR" + fi + echo "setting up network from $_resolved_network_dir" + configDir="$_resolved_network_dir/genesis" + dataDir="$_resolved_network_dir/data" else echo "set NETWORK_DIR env variable to run" exit fi; -# TODO: check for presense of all required files by filenames on configDir -if [ ! -n "$(ls -A $configDir)" ] -then - echo "no genesis config at path=$configDir, exiting." - exit -fi; - while [[ $# -gt 0 ]]; do key="$1" case $key in @@ -126,6 +125,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; + --network) + networkName="$2" + shift # past argument + shift # past value + ;; --logs) enableLogs=true shift @@ -143,6 +147,13 @@ then exit fi; +# Check genesis dir exists and is non-empty, unless --generateGenesis will create it +if [ "$generateGenesis" != "true" ] && [ ! -n "$(ls -A $configDir 2>/dev/null)" ] +then + echo "no genesis config at path=$configDir, exiting." + exit +fi; + # Validate --replace-with requires --restart-client if [[ -n "$replaceWith" ]] && [[ ! -n "$restartClient" ]]; then echo "Warning: --replace-with requires --restart-client. Ignoring --replace-with." @@ -162,6 +173,7 @@ fi; # freshStart logic removed - now handled by --generateGenesis flag +networkName="${networkName:-devnet-3}" echo "configDir = $configDir" echo "dataDir = $dataDir" @@ -179,4 +191,5 @@ echo "skipLeanpoint = ${skipLeanpoint:-false}" echo "skipNemo = ${skipNemo:-false}" echo "dryRun = ${dryRun:-false}" echo "replaceWith = ${replaceWith:-}" +echo "networkName = $networkName" echo "enableLogs = ${enableLogs:-false}" diff --git a/run-ansible.sh b/run-ansible.sh index 75d8267..4035df9 100755 --- a/run-ansible.sh +++ b/run-ansible.sh @@ -31,6 +31,7 @@ skipGenesis="${10}" # Set to "true" to skip genesis generation (e.g. when resta checkpointSyncUrl="${11}" # URL for checkpoint sync (when restarting with --restart-client) dryRun="${12}" # Set to "true" to run Ansible with --check --diff (no changes applied) syncAllHosts="${13}" # Set to "true" to sync config yamls to all hosts (used after --replace-with) +networkName="${14}" # Network label applied to all metrics (e.g. devnet-3, testnet, mainnet) # Determine SSH user: use root if --useRoot flag is set, otherwise use current user if [ "$useRoot" == "true" ]; then @@ -115,10 +116,9 @@ if [ -n "$validatorConfig" ] && [ "$validatorConfig" != "genesis_bootnode" ]; th EXTRA_VARS="$EXTRA_VARS validator_config=$validatorConfig" fi -# Pass the basename of the active validator config file so deploy-nodes.yml -# can sync the correct file (e.g. validator-config-subnets-2.yaml) to remotes. -validator_config_basename=$(basename "$validator_config_file") -EXTRA_VARS="$EXTRA_VARS validator_config_basename=$validator_config_basename" +# Pass the full local path of the active validator config so deploy-nodes.yml +# can sync the correct file regardless of where it lives on disk. +EXTRA_VARS="$EXTRA_VARS local_validator_config_path=$validator_config_file" if [ -n "$coreDumps" ]; then EXTRA_VARS="$EXTRA_VARS enable_core_dumps=$coreDumps" @@ -136,6 +136,8 @@ if [ "$syncAllHosts" == "true" ]; then EXTRA_VARS="$EXTRA_VARS sync_all_hosts=true" fi +EXTRA_VARS="$EXTRA_VARS network_name=$networkName" + # Determine deployment mode (docker/binary) - read default from group_vars/all.yml # Default to 'docker' if not specified in group_vars GROUP_VARS_FILE="$ANSIBLE_DIR/inventory/group_vars/all.yml" diff --git a/set-up.sh b/set-up.sh index fb556e9..c9c3362 100755 --- a/set-up.sh +++ b/set-up.sh @@ -16,6 +16,9 @@ if [ -n "$generateGenesis" ] || [ ! -f "$configDir/validators.yaml" ] || [ ! -f echo "🔧 Running genesis generator..." echo "================================================" + # Ensure genesis directory exists (may not exist when using an external NETWORK_DIR) + mkdir -p "$configDir" + # Find the genesis generator script genesis_generator="$scriptDir/generate-genesis.sh" @@ -24,8 +27,14 @@ if [ -n "$generateGenesis" ] || [ ! -f "$configDir/validators.yaml" ] || [ ! -f exit 1 fi + # Pass external validator config if provided (not the default genesis_bootnode sentinel) + _validator_config_flag="" + if [ -n "$validatorConfig" ] && [ "$validatorConfig" != "genesis_bootnode" ]; then + _validator_config_flag="--validator-config $validatorConfig" + fi + # Run the generator with deployment mode - if ! $genesis_generator "$configDir" --mode "$deployment_mode" $FORCE_KEYGEN_FLAG; then + if ! $genesis_generator "$configDir" --mode "$deployment_mode" $FORCE_KEYGEN_FLAG $_validator_config_flag; then echo "❌ Genesis generation failed!" exit 1 fi diff --git a/spin-node.sh b/spin-node.sh index db4987a..d590950 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -76,16 +76,27 @@ if [ "$enableLogs" == "true" ]; then _log_dir="$scriptDir/tmp" mkdir -p "$_log_dir" _log_start=$(date -u +%s) + _ts=$(date -u '+%d-%m-%Y-%H-%M') if [ "$deployment_mode" == "ansible" ]; then _log_prefix="ansible-run" + _config_prefix="ansible" else _log_prefix="local-run" + _config_prefix="local" fi - _log_file="$_log_dir/${_log_prefix}-$(date -u '+%d-%m-%Y-%H-%M').log" + _log_file="$_log_dir/${_log_prefix}-${_ts}.log" echo "$(date -u '+%Y-%m-%d %H:%M:%S') START spin-node.sh $_original_args" >> "$_log_dir/devnet.log" trap 'echo "$(date -u '\''+%Y-%m-%d %H:%M:%S'\'') END spin-node.sh ($(( $(date -u +%s) - _log_start ))s) -> '"$_log_file"'" >> "'"$_log_dir"'/devnet.log"' EXIT exec > >(tee -a "$_log_file") 2>&1 echo "Logging to $_log_file" + # Copy validator config with timestamped name matching the run log + if [ -n "$replaceWith" ]; then + _config_copy="$_log_dir/${_config_prefix}-${networkName}-validator-config-replace-${_ts}.yaml" + else + _config_copy="$_log_dir/${_config_prefix}-${networkName}-validator-config-${_ts}.yaml" + fi + cp "$validator_config_file" "$_config_copy" + echo "Validator config copied to $_config_copy" fi # If --subnets N is specified, expand the validator config template into a new @@ -177,7 +188,7 @@ if [ -n "$prepareMode" ] && [ "$prepareMode" == "true" ]; then echo "Preparing remote servers (verifying and installing required software)..." fi - if ! "$scriptDir/run-ansible.sh" "$configDir" "" "" "" "$validator_config_file" "$sshKeyFile" "$useRoot" "prepare" "" "" "" "$dryRun"; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "" "" "" "$validator_config_file" "$sshKeyFile" "$useRoot" "prepare" "" "" "" "$dryRun" "" "$networkName"; then echo "❌ Server preparation failed." exit 1 fi @@ -430,7 +441,7 @@ if [[ -n "$restartClient" ]]; then old_name="${replace_old_names[$idx]}" if [ "$deployment_mode" == "ansible" ]; then echo "Stopping $old_name and cleaning remote data via Ansible..." - "$scriptDir/run-ansible.sh" "$configDir" "$old_name" "true" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "" "true" "" || { + "$scriptDir/run-ansible.sh" "$configDir" "$old_name" "true" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "" "true" "" "" "" "$networkName" || { echo "Warning: Failed to stop $old_name via Ansible, continuing..." } else @@ -582,7 +593,7 @@ if [ "$deployment_mode" == "ansible" ]; then # Handle stop action if [ -n "$stopNodes" ] && [ "$stopNodes" == "true" ]; then echo "Stopping nodes via Ansible..." - if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "$coreDumps" "$ansible_skip_genesis" "" "$dryRun"; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "$coreDumps" "$ansible_skip_genesis" "" "$dryRun" "" "$networkName"; then echo "❌ Ansible stop operation failed. Exiting." exit 1 fi @@ -602,7 +613,7 @@ if [ "$deployment_mode" == "ansible" ]; then ansible_sync_all_hosts="" [[ "${has_replacements:-false}" = "true" ]] && ansible_sync_all_hosts="true" - if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$ansible_clean_data" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "" "$coreDumps" "$ansible_skip_genesis" "$ansible_checkpoint_url" "$dryRun" "$ansible_sync_all_hosts"; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$ansible_clean_data" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "" "$coreDumps" "$ansible_skip_genesis" "$ansible_checkpoint_url" "$dryRun" "$ansible_sync_all_hosts" "$networkName"; then echo "❌ Ansible deployment failed. Exiting." exit 1 fi @@ -629,7 +640,7 @@ if [ "$deployment_mode" == "ansible" ]; then _genesis_time=$(grep "GENESIS_TIME:" "$_genesis_config" | awk '{print $2}') if [ -n "$_genesis_time" ]; then echo "lean_genesis_time $_genesis_time" | curl -s --data-binary @- \ - "$_pushgateway_url/metrics/job/lean-quickstart" || \ + "$_pushgateway_url/metrics/job/lean-quickstart/network/$networkName" || \ echo "Warning: Failed to push lean_genesis_time to Pushgateway." fi fi