From 82a5588c5d7d507601456d094ca47c5fa272ec6f Mon Sep 17 00:00:00 2001 From: ns212 Date: Fri, 21 Mar 2025 07:35:51 +0000 Subject: [PATCH] feat(metrics): implement prometheus metrics endpoint --- .vscode/settings.json | 4 - Cargo.lock | 22 +++ p2pool/Cargo.toml | 1 + p2pool/src/server/http/server.rs | 1 + p2pool/src/server/http/stats/handlers.rs | 162 +++++++++++++++++++++++ 5 files changed, 186 insertions(+), 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 8e4651a7..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "workbench.preferredDarkColorTheme": "Visual Studio Dark - C++", - "workbench.colorTheme": "Tomorrow Night Blue" -} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 12969b21..2b462df7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5572,6 +5572,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror 1.0.69", +] + [[package]] name = "prometheus-client" version = "0.22.3" @@ -5701,6 +5716,12 @@ dependencies = [ "prost 0.13.4", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "qrcode" version = "0.12.0" @@ -6667,6 +6688,7 @@ dependencies = [ "minotari_app_grpc", "minotari_node_grpc_client", "num", + "prometheus", "rand", "rkv", "serde", diff --git a/p2pool/Cargo.toml b/p2pool/Cargo.toml index bdd4e92e..d46f5d23 100644 --- a/p2pool/Cargo.toml +++ b/p2pool/Cargo.toml @@ -69,6 +69,7 @@ lru = "0.12.5" tempfile = "3.14.0" rkv = { version = "0.19.0", features = ["lmdb"] } bincode = "1.3.3" +prometheus = "0.13.4" [package.metadata.cargo-machete] ignored = ["log4rs"] diff --git a/p2pool/src/server/http/server.rs b/p2pool/src/server/http/server.rs index 74783f3c..c4d45984 100644 --- a/p2pool/src/server/http/server.rs +++ b/p2pool/src/server/http/server.rs @@ -74,6 +74,7 @@ impl HttpServer { .route("/chain", get(handlers::handle_chain)) .route("/peer", get(handlers::handle_peers)) .route("/connections", get(handlers::handle_connections)) + .route("/metrics", get(handlers::handle_metrics)) .with_state(AppState { stats_client: self.stats_client.clone(), p2p_service_client: self.p2p_service_client.clone(), diff --git a/p2pool/src/server/http/stats/handlers.rs b/p2pool/src/server/http/stats/handlers.rs index 39e41016..a73f7277 100644 --- a/p2pool/src/server/http/stats/handlers.rs +++ b/p2pool/src/server/http/stats/handlers.rs @@ -6,9 +6,11 @@ use std::collections::HashMap; use axum::{ extract::{Query, State}, http::StatusCode, + response::Response, Json, }; use log::{error, info}; +use prometheus::{Encoder, Gauge, GaugeVec, IntGauge, Opts, Registry, TextEncoder}; use serde::Serialize; use tari_core::proof_of_work::PowAlgorithm; use tari_utilities::{epoch_time::EpochTime, hex::Hex}; @@ -21,6 +23,7 @@ use crate::server::{ }; const LOG_TARGET: &str = "tari::p2pool::server::stats::get"; +const METRICS_LOG_TARGET: &str = "tari::p2pool::server::stats::metrics"; #[derive(Serialize)] pub(crate) struct BlockResult { @@ -361,3 +364,162 @@ async fn get_chain_stats(state: AppState) -> Result<(GetStatsResponse, GetStatsR // .await, // ) // } +pub(crate) async fn handle_metrics(State(state): State) -> Response { + let timer = std::time::Instant::now(); + info!(target: METRICS_LOG_TARGET, "handle_metrics"); + + // Create a new registry + let registry = Registry::new(); + + // Get stats data + let stats_result = get_metrics_data(state, ®istry).await; + + // Generate the metrics output + let encoder = TextEncoder::new(); + let metric_families = registry.gather(); + let mut buffer = Vec::new(); + + if let Err(e) = encoder.encode(&metric_families, &mut buffer) { + error!(target: METRICS_LOG_TARGET, "Failed to encode metrics: {e:?}"); + return Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body(format!("Failed to encode metrics: {e:?}").into()) + .unwrap(); + } + + if timer.elapsed() > MAX_ACCEPTABLE_HTTP_TIMEOUT { + error!(target: METRICS_LOG_TARGET, "handle_metrics took too long: {}ms", timer.elapsed().as_millis()); + } + + // Return the metrics in Prometheus format + match stats_result { + Ok(_) => Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "text/plain; version=0.0.4") + .body(String::from_utf8_lossy(&buffer).to_string().into()) + .unwrap(), + Err(e) => Response::builder() + .status(StatusCode::INTERNAL_SERVER_ERROR) + .body(format!("Failed to collect metrics: {e:?}").into()) + .unwrap(), + } +} + +async fn get_metrics_data(state: AppState, registry: &Registry) -> Result<(), StatusCode> { + // Get full stats which includes chain stats and connection info + let stats = handle_get_stats(State(state.clone())).await?.0; + + // Get peer info + let (tx, rx) = oneshot::channel(); + state + .p2p_service_client + .send(P2pServiceQuery::GetPeers(tx)) + .await + .map_err(|error| { + error!(target: METRICS_LOG_TARGET, "Failed to get peers info: {error:?}"); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + let peers_info = rx.await.map_err(|error| { + error!(target: METRICS_LOG_TARGET, "Failed to receive peers info: {error:?}"); + StatusCode::INTERNAL_SERVER_ERROR + })?; + + // Register and set metrics + + // We'll use serde_json to convert the stats to a value we can extract fields from + let rx_stats_json = serde_json::to_value(&stats.randomx_stats).unwrap_or_default(); + let sha3x_stats_json = serde_json::to_value(&stats.sha3x_stats).unwrap_or_default(); + + // Chain height metrics + let chain_height = GaugeVec::new(Opts::new("p2pool_chain_height", "Current chain height"), &["algorithm"]).unwrap(); + registry.register(Box::new(chain_height.clone())).unwrap(); + + if let Some(height) = rx_stats_json.get("height").and_then(|v| v.as_u64()) { + chain_height.with_label_values(&["randomx"]).set(height as f64); + } + + if let Some(height) = sha3x_stats_json.get("height").and_then(|v| v.as_u64()) { + chain_height.with_label_values(&["sha3x"]).set(height as f64); + } + + // Shares metrics + let total_shares = GaugeVec::new(Opts::new("p2pool_total_shares", "Total number of shares"), &[ + "algorithm", + ]) + .unwrap(); + registry.register(Box::new(total_shares.clone())).unwrap(); + + if let Some(shares) = rx_stats_json.get("total_shares").and_then(|v| v.as_u64()) { + total_shares.with_label_values(&["randomx"]).set(shares as f64); + } + + if let Some(shares) = sha3x_stats_json.get("total_shares").and_then(|v| v.as_u64()) { + total_shares.with_label_values(&["sha3x"]).set(shares as f64); + } + + // My shares metrics + let my_shares = GaugeVec::new(Opts::new("p2pool_my_shares", "Number of my shares"), &["algorithm"]).unwrap(); + registry.register(Box::new(my_shares.clone())).unwrap(); + + if let Some(shares) = rx_stats_json.get("num_my_shares").and_then(|v| v.as_u64()) { + my_shares.with_label_values(&["randomx"]).set(shares as f64); + } + + if let Some(shares) = sha3x_stats_json.get("num_my_shares").and_then(|v| v.as_u64()) { + my_shares.with_label_values(&["sha3x"]).set(shares as f64); + } + + // Connection metrics + let connected_peers = Gauge::new("p2pool_connected_peers", "Number of connected peers").unwrap(); + registry.register(Box::new(connected_peers.clone())).unwrap(); + connected_peers.set(stats.connection_info.connected_peers as f64); + + // Connection counters + let pending_incoming = Gauge::new("p2pool_pending_incoming", "Number of pending incoming connections").unwrap(); + registry.register(Box::new(pending_incoming.clone())).unwrap(); + pending_incoming.set(stats.connection_info.network_info.connection_counters.pending_incoming as f64); + + let pending_outgoing = Gauge::new("p2pool_pending_outgoing", "Number of pending outgoing connections").unwrap(); + registry.register(Box::new(pending_outgoing.clone())).unwrap(); + pending_outgoing.set(stats.connection_info.network_info.connection_counters.pending_outgoing as f64); + + let established_incoming = Gauge::new( + "p2pool_established_incoming", + "Number of established incoming connections", + ) + .unwrap(); + registry.register(Box::new(established_incoming.clone())).unwrap(); + established_incoming.set( + stats + .connection_info + .network_info + .connection_counters + .established_incoming as f64, + ); + + let established_outgoing = Gauge::new( + "p2pool_established_outgoing", + "Number of established outgoing connections", + ) + .unwrap(); + registry.register(Box::new(established_outgoing.clone())).unwrap(); + established_outgoing.set( + stats + .connection_info + .network_info + .connection_counters + .established_outgoing as f64, + ); + + // Peer list metrics + let whitelist_peers = IntGauge::new("p2pool_whitelist_peers", "Number of whitelisted peers").unwrap(); + registry.register(Box::new(whitelist_peers.clone())).unwrap(); + whitelist_peers.set(peers_info.0.len() as i64); + + let greylist_peers = IntGauge::new("p2pool_greylist_peers", "Number of greylisted peers").unwrap(); + registry.register(Box::new(greylist_peers.clone())).unwrap(); + greylist_peers.set(peers_info.1.len() as i64); + + Ok(()) +}