From 2a4d3846eb8c6bd04c624668a9edb3cd50070067 Mon Sep 17 00:00:00 2001 From: wangzishuai1987 <360711168@qq.com> Date: Wed, 29 Apr 2026 13:17:03 +0000 Subject: [PATCH] feat(monitoring): add Prometheus config, Grafana dashboard, and compose overlay Add monitoring stack for the Grey testnet: - `monitoring/prometheus.yml`: scrapes all 6 validators on port 9615 - `monitoring/grafana/dashboards/grey.json`: pre-built dashboard with panels for block height, finality lag, peers, work packages, PVM gas, RPC latency, database size, gossipsub messages, and state transitions - `docker-compose.monitoring.yml`: overlay that adds Prometheus + Grafana containers, connected to the testnet network Usage: docker compose -f docker-compose.yml -f docker-compose.monitoring.yml up Ref: jarchain/jar#231 --- grey/docker-compose.monitoring.yml | 45 ++++ grey/monitoring/grafana/dashboards/grey.json | 211 +++++++++++++++++++ grey/monitoring/prometheus.yml | 22 ++ 3 files changed, 278 insertions(+) create mode 100644 grey/docker-compose.monitoring.yml create mode 100644 grey/monitoring/grafana/dashboards/grey.json create mode 100644 grey/monitoring/prometheus.yml diff --git a/grey/docker-compose.monitoring.yml b/grey/docker-compose.monitoring.yml new file mode 100644 index 000000000..fae1e1ba7 --- /dev/null +++ b/grey/docker-compose.monitoring.yml @@ -0,0 +1,45 @@ +# Docker Compose overlay for monitoring the Grey testnet. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose.monitoring.yml up +# +# Starts Prometheus + Grafana alongside the 6-validator testnet. +# Grafana is pre-provisioned with the Grey JAM dashboard. + +services: + prometheus: + image: prom/prometheus:latest + container_name: grey-prometheus + volumes: + - ../monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.retention.time=7d" + networks: + - grey-net + ports: + - "9090:9090" + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grey-grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/grey.json + volumes: + - ../monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + - grafana-data:/var/lib/grafana + networks: + - grey-net + ports: + - "3000:3000" + depends_on: + - prometheus + restart: unless-stopped + +volumes: + prometheus-data: + grafana-data: diff --git a/grey/monitoring/grafana/dashboards/grey.json b/grey/monitoring/grafana/dashboards/grey.json new file mode 100644 index 000000000..7e8486ffa --- /dev/null +++ b/grey/monitoring/grafana/dashboards/grey.json @@ -0,0 +1,211 @@ +{ + "annotations": { + "list": [] + }, + "description": "Grey JAM node monitoring dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "title": "Block Height", + "type": "stat", + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, + "targets": [ + { + "expr": "grey_block_height", + "legendFormat": "{{instance}}" + } + ], + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "green", "value": 10 } + ] + } + } + } + }, + { + "title": "Finalized Height", + "type": "stat", + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, + "targets": [ + { + "expr": "grey_finalized_height", + "legendFormat": "{{instance}}" + } + ] + }, + { + "title": "Finality Lag", + "type": "gauge", + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, + "targets": [ + { + "expr": "grey_finality_lag", + "legendFormat": "{{instance}}" + } + ], + "fieldConfig": { + "defaults": { + "min": 0, + "max": 50, + "thresholds": { + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 10 }, + { "color": "red", "value": 30 } + ] + }, + "unit": "slots" + } + } + }, + { + "title": "Connected Peers", + "type": "stat", + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, + "targets": [ + { + "expr": "grey_peer_count", + "legendFormat": "{{instance}}" + } + ] + }, + { + "title": "Blocks Produced / Imported", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "targets": [ + { + "expr": "rate(grey_blocks_produced_total[5m])", + "legendFormat": "produced {{instance}}" + }, + { + "expr": "rate(grey_blocks_imported_total[5m])", + "legendFormat": "imported {{instance}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "blocks/s" + } + } + }, + { + "title": "GRANDPA Round", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "targets": [ + { + "expr": "grey_grandpa_round", + "legendFormat": "{{instance}}" + } + ] + }, + { + "title": "Work Packages Submitted / Accumulated", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "targets": [ + { + "expr": "rate(grey_work_packages_submitted_total[5m])", + "legendFormat": "submitted {{instance}}" + }, + { + "expr": "rate(grey_work_packages_accumulated_total[5m])", + "legendFormat": "accumulated {{instance}}" + } + ] + }, + { + "title": "PVM Gas Used", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "targets": [ + { + "expr": "rate(grey_pvm_gas_used_total[5m])", + "legendFormat": "gas/s {{instance}}" + } + ] + }, + { + "title": "State Transition Duration", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, + "targets": [ + { + "expr": "grey_state_transition_last_seconds", + "legendFormat": "{{instance}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + } + } + }, + { + "title": "RPC Latency (p95)", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(grey_rpc_request_seconds_bucket[5m])) by (le, method))", + "legendFormat": "{{method}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + } + } + }, + { + "title": "Database Size", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 }, + "targets": [ + { + "expr": "grey_stored_blocks", + "legendFormat": "blocks {{instance}}" + }, + { + "expr": "grey_stored_states", + "legendFormat": "states {{instance}}" + }, + { + "expr": "grey_stored_chunks", + "legendFormat": "chunks {{instance}}" + } + ] + }, + { + "title": "Gossipsub Messages", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 28 }, + "targets": [ + { + "expr": "rate(grey_gossipsub_messages_total[5m])", + "legendFormat": "{{topic}} {{instance}}" + } + ] + } + ], + "schemaVersion": 39, + "tags": ["grey", "jam", "blockchain"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Grey JAM Node", + "uid": "grey-jam-node", + "version": 1 +} diff --git a/grey/monitoring/prometheus.yml b/grey/monitoring/prometheus.yml new file mode 100644 index 000000000..6b4562912 --- /dev/null +++ b/grey/monitoring/prometheus.yml @@ -0,0 +1,22 @@ +# Prometheus scrape configuration for Grey JAM node testnet. +# +# Scrapes all 6 validators in the docker-compose testnet. +# Each validator exposes /metrics on port 9615. + +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "grey-validators" + static_configs: + - targets: + - "grey-validator-0:9615" + - "grey-validator-1:9615" + - "grey-validator-2:9615" + - "grey-validator-3:9615" + - "grey-validator-4:9615" + - "grey-validator-5:9615" + labels: + network: "testnet" + metrics_path: /metrics