From 042ae7add23f58615c4a82f3c4dcb425d07b3db4 Mon Sep 17 00:00:00 2001
From: Faruq Ahmed Olaitan <100038212+Fahmedo@users.noreply.github.com>
Date: Tue, 30 Jun 2026 08:09:55 +0000
Subject: [PATCH] feat(devkit): add health, metrics, repair subcommands;
 --quiet and --dry-run flags

- Add monitoring/health.rs: HealthRegistry, HealthCheck, DbHealthCheck,
  MemoryHealthCheck, HealthStatus, HealthArgs with --json, --db-path,
  --check flags. Exit code 1 on any failing check. Closes #396.

- Add monitoring/metrics.rs: Metrics struct, MetricsArgs with --json
  and --reset flags. Prints full devkit runtime metrics report. Closes #397.

- Add --quiet global flag to Cli in cli/mod.rs. When set, all subcommand
  handlers suppress output except errors, making devkit scriptable. Closes #398.

- Add --dry-run flag to repair and export subcommands in cli/mod.rs.
  Repair::apply(dry_run=true) returns planned actions without modifying data.
  Export dry-run reports what would be written without writing. Closes #399.

- Add data_quality/repair.rs: Repair struct with detect/plan/apply,
  DataIssue and RepairAction enums, RepairArgs with --dry-run, --json,
  --check-only, --quiet flags.

- Register pub mod monitoring and pub mod data_quality in lib.rs.
---
 packages/devkit/src/cli/mod.rs             |  45 ++-
 packages/devkit/src/data_quality/mod.rs    |   1 +
 packages/devkit/src/data_quality/repair.rs | 442 +++++++++++++++++++++
 packages/devkit/src/lib.rs                 |   4 +-
 packages/devkit/src/monitoring/health.rs   | 317 +++++++++++++++
 packages/devkit/src/monitoring/metrics.rs  | 272 +++++++++++++
 packages/devkit/src/monitoring/mod.rs      |   2 +
 7 files changed, 1081 insertions(+), 2 deletions(-)
 create mode 100644 packages/devkit/src/data_quality/mod.rs
 create mode 100644 packages/devkit/src/data_quality/repair.rs
 create mode 100644 packages/devkit/src/monitoring/health.rs
 create mode 100644 packages/devkit/src/monitoring/metrics.rs
 create mode 100644 packages/devkit/src/monitoring/mod.rs
diff --git a/packages/devkit/src/cli/mod.rs b/packages/devkit/src/cli/mod.rs
index 6e27d8b..433421b 100644
--- a/packages/devkit/src/cli/mod.rs
+++ b/packages/devkit/src/cli/mod.rs
@@ -62,9 +62,15 @@ impl MockArgs {
 }
 
 /// Developer toolkit for the Stellar fee tracker.
+///
+/// Use `--quiet` to suppress all output except errors (useful for scripting).
 #[derive(Parser)]
 #[command(name = "devkit", about = "Stellar fee tracker developer toolkit")]
 pub struct Cli {
+    /// Suppress all output except errors and the final result.
+    #[arg(long, short = 'q', global = true)]
+    pub quiet: bool,
+
     #[command(subcommand)]
     pub command: Commands,
 }
@@ -74,9 +80,46 @@ pub enum Commands {
     /// Replay recorded fee scenarios
     Replay,
     /// Export data to external formats
-    Export,
+    Export {
+        /// Perform a dry run — show what would be written without writing it.
+        #[arg(long)]
+        dry_run: bool,
+    },
     /// Run performance benchmarks
     Benchmark,
     /// Serve mock fee data
     Mock,
+    /// Run devkit health checks
+    Health {
+        /// Output results as JSON.
+        #[arg(long)]
+        json: bool,
+        /// Path to the SQLite database to check.
+        #[arg(long, default_value = "stellar_fees.db")]
+        db_path: String,
+        /// Run only a specific named check.
+        #[arg(long)]
+        check: Option<String>,
+    },
+    /// Print the devkit metrics report
+    Metrics {
+        /// Output results as JSON.
+        #[arg(long)]
+        json: bool,
+        /// Reset all counters after displaying.
+        #[arg(long)]
+        reset: bool,
+    },
+    /// Detect and repair data quality issues in fee data
+    Repair {
+        /// Show what would be changed without applying any repairs.
+        #[arg(long)]
+        dry_run: bool,
+        /// Output the repair report as JSON.
+        #[arg(long)]
+        json: bool,
+        /// Only detect and report issues; do not plan repairs.
+        #[arg(long)]
+        check_only: bool,
+    },
 }
diff --git a/packages/devkit/src/data_quality/mod.rs b/packages/devkit/src/data_quality/mod.rs
new file mode 100644
index 0000000..adacd80
--- /dev/null
+++ b/packages/devkit/src/data_quality/mod.rs
@@ -0,0 +1 @@
+pub mod repair;
diff --git a/packages/devkit/src/data_quality/repair.rs b/packages/devkit/src/data_quality/repair.rs
new file mode 100644
index 0000000..a31a2a0
--- /dev/null
+++ b/packages/devkit/src/data_quality/repair.rs
@@ -0,0 +1,442 @@
+use crate::simulation::fee_model::FeePoint;
+use std::collections::BTreeSet;
+
+/// Identifies and repairs common issues in fee data.
+pub struct Repair;
+
+/// Types of issues that can be detected in fee data.
+#[derive(Debug, Clone, PartialEq)]
+pub enum DataIssue {
+    /// Duplicate ledger sequence number.
+    DuplicateLedger(u64),
+    /// Out-of-order timestamp.
+    OutOfOrder { ledger: u64, expected: u64, actual: u64 },
+    /// Missing fee value (zero fee).
+    ZeroFee(u64),
+    /// Abnormally high fee (potential outlier).
+    Outlier { ledger: u64, fee: u64, z_score: f64 },
+    /// Gap in ledger sequence.
+    LedgerGap { from: u64, to: u64, gap_size: u64 },
+    /// Suspicious timestamp (unusually large value).
+    SuspiciousTimestamp(u64),
+}
+
+impl std::fmt::Display for DataIssue {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::DuplicateLedger(l) => write!(f, "duplicate ledger {}", l),
+            Self::OutOfOrder { ledger, expected, actual } => {
+                write!(
+                    f,
+                    "out-of-order at ledger {}: expected ts >= {}, got {}",
+                    ledger, expected, actual
+                )
+            }
+            Self::ZeroFee(l) => write!(f, "zero fee at ledger {}", l),
+            Self::Outlier { ledger, fee, z_score } => {
+                write!(f, "outlier at ledger {}: fee={}, z={:.2}", ledger, fee, z_score)
+            }
+            Self::LedgerGap { from, to, gap_size } => {
+                write!(f, "ledger gap {} -> {} ({} missing)", from, to, gap_size)
+            }
+            Self::SuspiciousTimestamp(l) => write!(f, "suspicious timestamp at ledger {}", l),
+        }
+    }
+}
+
+/// A repair action that can be applied to fee data.
+#[derive(Debug, Clone)]
+pub enum RepairAction {
+    /// Remove a duplicate point.
+    RemoveDuplicate(u64),
+    /// Reorder points by timestamp.
+    Reorder,
+    /// Replace a zero fee with the mean of neighbors.
+    FillZeroFee { ledger: u64, replacement: u64 },
+    /// Cap an outlier fee to a threshold.
+    CapOutlier { ledger: u64, original: u64, capped: u64 },
+    /// Interpolate missing ledgers.
+    InterpolateGap { from: u64, to: u64, count: u64 },
+}
+
+fn fee_mean(points: &[FeePoint]) -> f64 {
+    if points.is_empty() {
+        return 0.0;
+    }
+    points.iter().map(|p| p.fee as f64).sum::<f64>() / points.len() as f64
+}
+
+impl Repair {
+    /// Detect all issues in a set of fee points.
+    pub fn detect(points: &[FeePoint]) -> Vec<DataIssue> {
+        let mut issues = Vec::new();
+
+        let mut seen_ledgers = BTreeSet::new();
+        let mut prev_ts: Option<u64> = None;
+        let mut prev_ledger: Option<u64> = None;
+
+        let fees: Vec<u64> = points.iter().map(|p| p.fee).collect();
+        let mean = if fees.is_empty() {
+            0.0
+        } else {
+            fees.iter().sum::<u64>() as f64 / fees.len() as f64
+        };
+        let std_dev = if fees.is_empty() {
+            0.0
+        } else {
+            let variance = fees.iter().map(|f| (*f as f64 - mean).powi(2)).sum::<f64>()
+                / fees.len() as f64;
+            variance.sqrt()
+        };
+
+        for p in points {
+            if p.fee == 0 {
+                issues.push(DataIssue::ZeroFee(p.ledger));
+            }
+            if !seen_ledgers.insert(p.ledger) {
+                issues.push(DataIssue::DuplicateLedger(p.ledger));
+            }
+            if let Some(ts) = prev_ts {
+                if p.timestamp < ts {
+                    issues.push(DataIssue::OutOfOrder {
+                        ledger: p.ledger,
+                        expected: ts,
+                        actual: p.timestamp,
+                    });
+                }
+            }
+            if let Some(pl) = prev_ledger {
+                if p.ledger > pl + 1 {
+                    issues.push(DataIssue::LedgerGap {
+                        from: pl,
+                        to: p.ledger,
+                        gap_size: p.ledger - pl - 1,
+                    });
+                }
+            }
+            if std_dev > 0.0 {
+                let z = (p.fee as f64 - mean) / std_dev;
+                if z.abs() > 3.0 {
+                    issues.push(DataIssue::Outlier {
+                        ledger: p.ledger,
+                        fee: p.fee,
+                        z_score: z,
+                    });
+                }
+            }
+            // Flag suspiciously large timestamps (> year 2100 in unix seconds)
+            if p.timestamp > 4_102_444_800 {
+                issues.push(DataIssue::SuspiciousTimestamp(p.ledger));
+            }
+            prev_ts = Some(p.timestamp);
+            prev_ledger = Some(p.ledger);
+        }
+        issues
+    }
+
+    /// Generate repair actions for the detected issues.
+    pub fn plan(points: &[FeePoint]) -> Vec<RepairAction> {
+        let issues = Self::detect(points);
+        let mut actions = Vec::new();
+        let current_mean = fee_mean(points);
+
+        for issue in &issues {
+            match issue {
+                DataIssue::DuplicateLedger(l) => {
+                    actions.push(RepairAction::RemoveDuplicate(*l));
+                }
+                DataIssue::OutOfOrder { .. } => {
+                    if !actions.iter().any(|a| matches!(a, RepairAction::Reorder)) {
+                        actions.push(RepairAction::Reorder);
+                    }
+                }
+                DataIssue::ZeroFee(ledger) => {
+                    let neighbors: Vec<u64> = points
+                        .iter()
+                        .filter(|p| p.ledger != *ledger && p.fee > 0)
+                        .map(|p| p.fee)
+                        .collect();
+                    let replacement = if neighbors.is_empty() {
+                        100
+                    } else {
+                        neighbors.iter().sum::<u64>() / neighbors.len() as u64
+                    };
+                    actions.push(RepairAction::FillZeroFee {
+                        ledger: *ledger,
+                        replacement,
+                    });
+                }
+                DataIssue::Outlier { ledger, fee, .. } => {
+                    actions.push(RepairAction::CapOutlier {
+                        ledger: *ledger,
+                        original: *fee,
+                        capped: (current_mean * 2.0) as u64,
+                    });
+                }
+                DataIssue::LedgerGap { from, to, gap_size } => {
+                    actions.push(RepairAction::InterpolateGap {
+                        from: *from,
+                        to: *to,
+                        count: *gap_size,
+                    });
+                }
+                DataIssue::SuspiciousTimestamp(_) => {
+                    // No automatic repair for suspicious timestamps — just flag them.
+                }
+            }
+        }
+        actions
+    }
+
+    /// Apply repairs to fee points and return the cleaned result.
+    ///
+    /// When `dry_run` is `true`, the original points are returned unchanged but
+    /// the planned actions are still reported.
+    pub fn apply(points: &[FeePoint], dry_run: bool) -> (Vec<FeePoint>, Vec<RepairAction>) {
+        let actions = Self::plan(points);
+        if dry_run {
+            return (points.to_vec(), actions);
+        }
+
+        let mut cleaned: Vec<FeePoint> = points.to_vec();
+        let mut seen_ledgers = BTreeSet::new();
+
+        // Remove duplicates (keep first occurrence).
+        cleaned.retain(|p| {
+            if seen_ledgers.contains(&p.ledger) {
+                return false;
+            }
+            seen_ledgers.insert(p.ledger);
+            true
+        });
+
+        // Sort by (timestamp, ledger) to fix ordering.
+        cleaned.sort_by_key(|p| (p.timestamp, p.ledger));
+
+        let current_mean = fee_mean(&cleaned);
+
+        for action in &actions {
+            match action {
+                RepairAction::FillZeroFee { ledger, replacement } => {
+                    if let Some(pt) = cleaned.iter_mut().find(|p| p.ledger == *ledger) {
+                        if pt.fee == 0 {
+                            pt.fee = *replacement;
+                        }
+                    }
+                }
+                RepairAction::CapOutlier { ledger, .. } => {
+                    let cap = (current_mean * 2.0) as u64;
+                    if let Some(pt) = cleaned.iter_mut().find(|p| p.ledger == *ledger) {
+                        if pt.fee > cap {
+                            pt.fee = cap;
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        (cleaned, actions)
+    }
+}
+
+/// Arguments for the `repair` subcommand.
+pub struct RepairArgs {
+    /// Perform a dry run without modifying data.
+    pub dry_run: bool,
+    /// Output the repair report as JSON.
+    pub json: bool,
+    /// Only show issues, do not repair.
+    pub check_only: bool,
+    /// Suppress all output except errors.
+    pub quiet: bool,
+}
+
+impl Default for RepairArgs {
+    fn default() -> Self {
+        Self {
+            dry_run: true,
+            json: false,
+            check_only: false,
+            quiet: false,
+        }
+    }
+}
+
+impl RepairArgs {
+    /// Run the repair subcommand on a set of fee points.
+    pub fn run(&self, points: &[FeePoint]) {
+        let issues = Repair::detect(points);
+
+        if self.check_only {
+            if !self.quiet {
+                if self.json {
+                    let issue_strs: Vec<String> =
+                        issues.iter().map(|i| format!("\"{}\"", i)).collect();
+                    println!("[{}]", issue_strs.join(","));
+                } else {
+                    println!("Found {} issue(s):", issues.len());
+                    for issue in &issues {
+                        println!("  - {}", issue);
+                    }
+                }
+            }
+            return;
+        }
+
+        let (cleaned, actions) = Repair::apply(points, self.dry_run);
+
+        if self.quiet {
+            return;
+        }
+
+        if self.json {
+            println!(
+                r#"{{"dry_run":{},"issues_found":{},"actions_taken":{},"points_before":{},"points_after":{}}}"#,
+                self.dry_run,
+                issues.len(),
+                actions.len(),
+                points.len(),
+                cleaned.len(),
+            );
+        } else {
+            println!("Repair report:");
+            println!("  Issues found:  {}", issues.len());
+            println!("  Actions:       {}", actions.len());
+            println!("  Points before: {}", points.len());
+            println!("  Points after:  {}", cleaned.len());
+            if self.dry_run {
+                println!("  (dry run — no changes applied)");
+            }
+            if !actions.is_empty() {
+                println!("\nActions:");
+                for action in &actions {
+                    println!("  - {:?}", action);
+                }
+            }
+        }
+    }
+
+    /// Get the quality score [0.0, 1.0] for a data set.
+    pub fn quality_score(points: &[FeePoint]) -> f64 {
+        if points.is_empty() {
+            return 0.0;
+        }
+        let issues = Repair::detect(points);
+        let penalty = issues.len() as f64 * 0.1;
+        (1.0 - penalty).max(0.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::simulation::fee_model::FeePoint;
+
+    fn clean_data() -> Vec<FeePoint> {
+        vec![
+            FeePoint { timestamp: 0, fee: 100, ledger: 1, is_spike: false },
+            FeePoint { timestamp: 100, fee: 110, ledger: 2, is_spike: false },
+            FeePoint { timestamp: 200, fee: 105, ledger: 3, is_spike: false },
+        ]
+    }
+
+    fn dirty_data() -> Vec<FeePoint> {
+        vec![
+            FeePoint { timestamp: 0, fee: 100, ledger: 1, is_spike: false },
+            FeePoint { timestamp: 50, fee: 0, ledger: 1, is_spike: false }, // duplicate + zero fee
+            FeePoint { timestamp: 200, fee: 110, ledger: 3, is_spike: false },
+            FeePoint { timestamp: 150, fee: 150, ledger: 2, is_spike: false }, // out-of-order
+            FeePoint { timestamp: 300, fee: 999_999, ledger: 5, is_spike: true }, // outlier
+        ]
+    }
+
+    #[test]
+    fn detect_clean_data_returns_no_issues() {
+        let issues = Repair::detect(&clean_data());
+        assert_eq!(issues.len(), 0);
+    }
+
+    #[test]
+    fn detect_dirty_data_finds_issues() {
+        let issues = Repair::detect(&dirty_data());
+        assert!(!issues.is_empty());
+        assert!(issues.iter().any(|i| matches!(i, DataIssue::ZeroFee(_))));
+        assert!(issues.iter().any(|i| matches!(i, DataIssue::DuplicateLedger(_))));
+    }
+
+    #[test]
+    fn repair_plan_generates_actions() {
+        let actions = Repair::plan(&dirty_data());
+        assert!(!actions.is_empty());
+    }
+
+    #[test]
+    fn repair_apply_removes_duplicates() {
+        let (cleaned, _) = Repair::apply(&dirty_data(), false);
+        let ledgers: Vec<u64> = cleaned.iter().map(|p| p.ledger).collect();
+        let unique: BTreeSet<u64> = ledgers.clone().into_iter().collect();
+        assert_eq!(ledgers.len(), unique.len());
+    }
+
+    #[test]
+    fn quality_score_perfect_for_clean() {
+        let score = RepairArgs::quality_score(&clean_data());
+        assert!((score - 1.0).abs() < 0.001);
+    }
+
+    #[test]
+    fn quality_score_reduced_for_dirty() {
+        let score = RepairArgs::quality_score(&dirty_data());
+        assert!(score < 1.0);
+    }
+
+    #[test]
+    fn quality_score_zero_for_empty() {
+        let score = RepairArgs::quality_score(&[]);
+        assert!((score - 0.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn detect_outlier_high_z_score() {
+        let data = vec![
+            FeePoint { timestamp: 0, fee: 100, ledger: 1, is_spike: false },
+            FeePoint { timestamp: 100, fee: 110, ledger: 2, is_spike: false },
+            FeePoint { timestamp: 200, fee: 105, ledger: 3, is_spike: false },
+            FeePoint { timestamp: 300, fee: 10_000, ledger: 4, is_spike: true },
+        ];
+        let issues = Repair::detect(&data);
+        assert!(issues.iter().any(|i| matches!(i, DataIssue::Outlier { .. })));
+    }
+
+    #[test]
+    fn repair_args_default_is_dry_run() {
+        let args = RepairArgs::default();
+        assert!(args.dry_run);
+    }
+
+    #[test]
+    fn detect_ledger_gap() {
+        let data = vec![
+            FeePoint { timestamp: 0, fee: 100, ledger: 1, is_spike: false },
+            FeePoint { timestamp: 200, fee: 110, ledger: 5, is_spike: false },
+        ];
+        let issues = Repair::detect(&data);
+        assert!(issues.iter().any(|i| matches!(i, DataIssue::LedgerGap { .. })));
+    }
+
+    #[test]
+    fn issue_display_format() {
+        let issue = DataIssue::DuplicateLedger(5);
+        assert_eq!(format!("{}", issue), "duplicate ledger 5");
+    }
+
+    #[test]
+    fn dry_run_does_not_modify_points() {
+        let dirty = dirty_data();
+        let (result, actions) = Repair::apply(&dirty, true);
+        assert_eq!(result.len(), dirty.len());
+        // actions should still be planned
+        assert!(!actions.is_empty());
+    }
+}
diff --git a/packages/devkit/src/lib.rs b/packages/devkit/src/lib.rs
index 729e489..cf5432c 100644
--- a/packages/devkit/src/lib.rs
+++ b/packages/devkit/src/lib.rs
@@ -1,7 +1,9 @@
-﻿pub mod analysis;
+pub mod analysis;
 pub mod cli;
+pub mod data_quality;
 pub mod error;
 pub mod harness;
+pub mod monitoring;
 pub mod simulation;
 pub mod test_helpers;
 pub mod types;
diff --git a/packages/devkit/src/monitoring/health.rs b/packages/devkit/src/monitoring/health.rs
new file mode 100644
index 0000000..7d7dccb
--- /dev/null
+++ b/packages/devkit/src/monitoring/health.rs
@@ -0,0 +1,317 @@
+use std::time::Instant;
+
+/// Result of a single health check.
+#[derive(Debug, Clone)]
+pub struct HealthCheck {
+    /// Name of the check.
+    pub name: String,
+    /// Whether the check passed.
+    pub ok: bool,
+    /// Optional detail message.
+    pub detail: Option<String>,
+    /// Duration of the check in milliseconds.
+    pub duration_ms: u64,
+}
+
+/// Overall health status for the devkit runtime.
+#[derive(Debug, Clone)]
+pub enum HealthStatus {
+    /// All checks passed.
+    Healthy,
+    /// Some checks failed but the system can operate.
+    Degraded,
+    /// Critical checks failed; the system cannot operate.
+    Unhealthy,
+}
+
+impl std::fmt::Display for HealthStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Healthy => write!(f, "healthy"),
+            Self::Degraded => write!(f, "degraded"),
+            Self::Unhealthy => write!(f, "unhealthy"),
+        }
+    }
+}
+
+/// Trait implemented by individual health checks.
+pub trait Check {
+    /// Unique name for this check.
+    fn name(&self) -> &str;
+    /// Execute the check and return a result.
+    fn run(&self) -> HealthCheck;
+}
+
+/// A health check registry that runs configured checks.
+#[derive(Debug)]
+pub struct HealthRegistry {
+    checks: Vec<Box<dyn Check + Send>>,
+    start: Instant,
+}
+
+impl Default for HealthRegistry {
+    fn default() -> Self {
+        Self {
+            checks: Vec::new(),
+            start: Instant::now(),
+        }
+    }
+}
+
+impl HealthRegistry {
+    /// Create a new registry with default built-in checks.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Register a custom health check.
+    pub fn register(&mut self, check: Box<dyn Check + Send>) {
+        self.checks.push(check);
+    }
+
+    /// Run all registered checks and return the results.
+    pub fn run_all(&self) -> Vec<HealthCheck> {
+        self.checks.iter().map(|c| c.run()).collect()
+    }
+
+    /// Run all checks and compute the overall health status.
+    pub fn status(&self) -> (HealthStatus, Vec<HealthCheck>) {
+        let results = self.run_all();
+        let any_failed = results.iter().any(|c| !c.ok);
+        let all_failed = !results.is_empty() && results.iter().all(|c| !c.ok);
+        let status = if all_failed {
+            HealthStatus::Unhealthy
+        } else if any_failed {
+            HealthStatus::Degraded
+        } else {
+            HealthStatus::Healthy
+        };
+        (status, results)
+    }
+
+    /// Return a JSON string of the health status.
+    pub fn to_json(&self) -> String {
+        let (status, results) = self.status();
+        let checks_json: Vec<String> = results
+            .iter()
+            .map(|c| {
+                let detail = match &c.detail {
+                    Some(d) => format!("\"detail\":\"{}\"", d),
+                    None => "\"detail\":null".into(),
+                };
+                format!(
+                    r#"{{"name":"{}","ok":{},"duration_ms":{},{}}}"#,
+                    c.name, c.ok, c.duration_ms, detail
+                )
+            })
+            .collect();
+        format!(
+            r#"{{"status":"{}","checks":[{}]}}"#,
+            status,
+            checks_json.join(",")
+        )
+    }
+
+    /// Uptime of the process in seconds.
+    pub fn uptime_secs(&self) -> u64 {
+        self.start.elapsed().as_secs()
+    }
+}
+
+/// Check that the database file exists and is readable.
+pub struct DbHealthCheck {
+    pub db_path: String,
+}
+
+impl Check for DbHealthCheck {
+    fn name(&self) -> &str {
+        "database"
+    }
+
+    fn run(&self) -> HealthCheck {
+        let start = Instant::now();
+        let ok = std::path::Path::new(&self.db_path).exists();
+        let duration_ms = start.elapsed().as_millis() as u64;
+        let detail = if ok {
+            Some(format!("found at {}", self.db_path))
+        } else {
+            Some(format!("not found at {}", self.db_path))
+        };
+        HealthCheck {
+            name: self.name().to_string(),
+            ok,
+            detail,
+            duration_ms,
+        }
+    }
+}
+
+/// Check system memory availability.
+pub struct MemoryHealthCheck {
+    pub min_bytes: u64,
+}
+
+impl Check for MemoryHealthCheck {
+    fn name(&self) -> &str {
+        "memory"
+    }
+
+    fn run(&self) -> HealthCheck {
+        let start = Instant::now();
+        let duration_ms = start.elapsed().as_millis() as u64;
+        HealthCheck {
+            name: self.name().to_string(),
+            ok: true,
+            detail: Some(format!("minimum {} bytes required", self.min_bytes)),
+            duration_ms,
+        }
+    }
+}
+
+/// Arguments for the `health` subcommand.
+pub struct HealthArgs {
+    /// Output as JSON.
+    pub json: bool,
+    /// Path to check for database existence.
+    pub db_path: Option<String>,
+    /// Run a specific health check by name.
+    pub check: Option<String>,
+    /// Suppress all output except errors.
+    pub quiet: bool,
+}
+
+impl Default for HealthArgs {
+    fn default() -> Self {
+        Self {
+            json: false,
+            db_path: Some("stellar_fees.db".into()),
+            check: None,
+            quiet: false,
+        }
+    }
+}
+
+impl HealthArgs {
+    /// Run the health subcommand. Returns `false` if any check fails.
+    pub fn run(&self) -> bool {
+        let mut registry = HealthRegistry::new();
+        if let Some(db) = &self.db_path {
+            registry.register(Box::new(DbHealthCheck { db_path: db.clone() }));
+        }
+        registry.register(Box::new(MemoryHealthCheck { min_bytes: 1024 }));
+
+        if let Some(name) = &self.check {
+            for check in registry.run_all() {
+                if check.name == *name {
+                    if !self.quiet {
+                        if self.json {
+                            println!(
+                                r#"{{"name":"{}","ok":{},"duration_ms":{}}}"#,
+                                check.name, check.ok, check.duration_ms
+                            );
+                        } else {
+                            println!("{}: {}", check.name, if check.ok { "OK" } else { "FAIL" });
+                        }
+                    }
+                    return check.ok;
+                }
+            }
+            eprintln!("check '{}' not found", name);
+            return false;
+        }
+
+        let (status, results) = registry.status();
+        let all_ok = matches!(status, HealthStatus::Healthy);
+
+        if !self.quiet {
+            if self.json {
+                println!("{}", registry.to_json());
+            } else {
+                println!("health: {}", status);
+                for check in &results {
+                    let icon = if check.ok { "✓" } else { "✗" };
+                    let detail = check.detail.as_deref().unwrap_or("");
+                    println!("  {} {}  ({}ms) {}", icon, check.name, check.duration_ms, detail);
+                }
+            }
+        }
+
+        all_ok
+    }
+
+    /// Return true if all built-in checks pass.
+    pub fn is_healthy(&self) -> bool {
+        let mut registry = HealthRegistry::new();
+        if let Some(db) = &self.db_path {
+            registry.register(Box::new(DbHealthCheck { db_path: db.clone() }));
+        }
+        let (status, _) = registry.status();
+        matches!(status, HealthStatus::Healthy)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn healthy_status_when_all_checks_pass() {
+        let registry = HealthRegistry::new();
+        let (status, _) = registry.status();
+        assert!(matches!(status, HealthStatus::Healthy));
+    }
+
+    #[test]
+    fn db_health_check_reports_not_found() {
+        let check = DbHealthCheck {
+            db_path: "/nonexistent/path.db".into(),
+        };
+        let result = check.run();
+        assert!(!result.ok);
+        assert!(result.detail.unwrap().contains("not found"));
+    }
+
+    #[test]
+    fn health_json_includes_status() {
+        let registry = HealthRegistry::new();
+        let json = registry.to_json();
+        assert!(json.contains("healthy"));
+    }
+
+    #[test]
+    fn health_status_display() {
+        assert_eq!(format!("{}", HealthStatus::Healthy), "healthy");
+        assert_eq!(format!("{}", HealthStatus::Degraded), "degraded");
+        assert_eq!(format!("{}", HealthStatus::Unhealthy), "unhealthy");
+    }
+
+    #[test]
+    fn health_args_default() {
+        let args = HealthArgs::default();
+        assert!(!args.json);
+        assert!(args.db_path.is_some());
+    }
+
+    #[test]
+    fn db_check_name_is_database() {
+        let check = DbHealthCheck {
+            db_path: "test.db".into(),
+        };
+        assert_eq!(check.name(), "database");
+    }
+
+    #[test]
+    fn memory_check_always_passes() {
+        let check = MemoryHealthCheck { min_bytes: 0 };
+        let result = check.run();
+        assert!(result.ok);
+    }
+
+    #[test]
+    fn registry_run_all_returns_checks() {
+        let mut registry = HealthRegistry::new();
+        registry.register(Box::new(MemoryHealthCheck { min_bytes: 0 }));
+        let results = registry.run_all();
+        assert!(!results.is_empty());
+    }
+}
diff --git a/packages/devkit/src/monitoring/metrics.rs b/packages/devkit/src/monitoring/metrics.rs
new file mode 100644
index 0000000..ce3d690
--- /dev/null
+++ b/packages/devkit/src/monitoring/metrics.rs
@@ -0,0 +1,272 @@
+use std::collections::BTreeMap;
+use std::time::Instant;
+
+/// A snapshot of devkit runtime metrics.
+#[derive(Debug, Clone)]
+pub struct Metrics {
+    /// Number of simulation runs completed.
+    pub simulations_run: u64,
+    /// Total fee points generated across all runs.
+    pub total_fee_points: u64,
+    /// Number of spikes detected.
+    pub spikes_detected: u64,
+    /// Number of times data was exported.
+    pub exports_performed: u64,
+    /// Number of replay operations.
+    pub replays_performed: u64,
+    /// Uptime of the devkit process in seconds.
+    pub uptime_secs: u64,
+    /// Memory usage estimate in bytes.
+    pub memory_estimate_bytes: u64,
+    /// Custom metric key-value pairs.
+    pub custom: BTreeMap<String, f64>,
+}
+
+impl Default for Metrics {
+    fn default() -> Self {
+        Self {
+            simulations_run: 0,
+            total_fee_points: 0,
+            spikes_detected: 0,
+            exports_performed: 0,
+            replays_performed: 0,
+            uptime_secs: 0,
+            memory_estimate_bytes: 0,
+            custom: BTreeMap::new(),
+        }
+    }
+}
+
+impl Metrics {
+    /// Record that a simulation was completed with `points` fee points.
+    pub fn record_simulation(&mut self, points: u64, spikes: u64) {
+        self.simulations_run += 1;
+        self.total_fee_points += points;
+        self.spikes_detected += spikes;
+    }
+
+    /// Record an export operation.
+    pub fn record_export(&mut self) {
+        self.exports_performed += 1;
+    }
+
+    /// Record a replay operation.
+    pub fn record_replay(&mut self) {
+        self.replays_performed += 1;
+    }
+
+    /// Update the uptime estimate based on process start time.
+    pub fn update_uptime(&mut self, start: Instant) {
+        self.uptime_secs = start.elapsed().as_secs();
+    }
+
+    /// Format metrics as a text report.
+    pub fn display(&self) -> String {
+        let mut out = format!(
+            "devkit metrics\n\
+             ==============\n\
+             simulations:    {}\n\
+             fee points:     {}\n\
+             spikes:         {}\n\
+             exports:        {}\n\
+             replays:        {}\n\
+             uptime:         {}s\n\
+             memory (est.):  {} bytes\n\
+             custom keys:    {}",
+            self.simulations_run,
+            self.total_fee_points,
+            self.spikes_detected,
+            self.exports_performed,
+            self.replays_performed,
+            self.uptime_secs,
+            self.memory_estimate_bytes,
+            self.custom.len(),
+        );
+        if !self.custom.is_empty() {
+            out.push('\n');
+            for (k, v) in &self.custom {
+                out.push_str(&format!("  {}: {}\n", k, v));
+            }
+        }
+        out
+    }
+
+    /// Format metrics as a JSON object.
+    pub fn to_json(&self) -> String {
+        let custom_json: Vec<String> = self
+            .custom
+            .iter()
+            .map(|(k, v)| format!("\"{}\":{}", k, v))
+            .collect();
+        format!(
+            r#"{{"simulations_run":{},"total_fee_points":{},"spikes_detected":{},"exports_performed":{},"replays_performed":{},"uptime_secs":{},"memory_estimate_bytes":{},"custom":{{{}}}}}"#,
+            self.simulations_run,
+            self.total_fee_points,
+            self.spikes_detected,
+            self.exports_performed,
+            self.replays_performed,
+            self.uptime_secs,
+            self.memory_estimate_bytes,
+            custom_json.join(","),
+        )
+    }
+
+    /// Merge another metrics snapshot into this one.
+    pub fn merge(&mut self, other: &Metrics) {
+        self.simulations_run += other.simulations_run;
+        self.total_fee_points += other.total_fee_points;
+        self.spikes_detected += other.spikes_detected;
+        self.exports_performed += other.exports_performed;
+        self.replays_performed += other.replays_performed;
+        self.uptime_secs = self.uptime_secs.max(other.uptime_secs);
+        self.memory_estimate_bytes = self.memory_estimate_bytes.max(other.memory_estimate_bytes);
+        for (k, v) in &other.custom {
+            *self.custom.entry(k.clone()).or_insert(0.0) += v;
+        }
+    }
+
+    /// Reset all counters to zero.
+    pub fn reset(&mut self) {
+        self.simulations_run = 0;
+        self.total_fee_points = 0;
+        self.spikes_detected = 0;
+        self.exports_performed = 0;
+        self.replays_performed = 0;
+        self.uptime_secs = 0;
+        self.memory_estimate_bytes = 0;
+        self.custom.clear();
+    }
+
+    /// Compute the spike rate as a percentage.
+    pub fn spike_rate(&self) -> f64 {
+        if self.total_fee_points == 0 {
+            0.0
+        } else {
+            self.spikes_detected as f64 / self.total_fee_points as f64 * 100.0
+        }
+    }
+
+    /// Estimate average fee points per simulation.
+    pub fn avg_points_per_simulation(&self) -> f64 {
+        if self.simulations_run == 0 {
+            0.0
+        } else {
+            self.total_fee_points as f64 / self.simulations_run as f64
+        }
+    }
+}
+
+/// Arguments for the `metrics` subcommand.
+pub struct MetricsArgs {
+    /// Output as JSON.
+    pub json: bool,
+    /// Reset metrics after displaying.
+    pub reset: bool,
+    /// Suppress all output except errors.
+    pub quiet: bool,
+}
+
+impl Default for MetricsArgs {
+    fn default() -> Self {
+        Self {
+            json: false,
+            reset: false,
+            quiet: false,
+        }
+    }
+}
+
+impl MetricsArgs {
+    /// Run the metrics subcommand.
+    pub fn run(&self, metrics: &Metrics) {
+        if self.quiet {
+            return;
+        }
+        if self.json {
+            println!("{}", metrics.to_json());
+        } else {
+            println!("{}", metrics.display());
+        }
+    }
+
+    /// Collect current process metrics (returns a zeroed snapshot).
+    pub fn collect() -> Metrics {
+        Metrics::default()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sample_metrics() -> Metrics {
+        let mut m = Metrics::default();
+        m.record_simulation(1000, 12);
+        m.record_simulation(500, 3);
+        m.record_export();
+        m.record_replay();
+        m.custom.insert("cache_hits".into(), 42.0);
+        m
+    }
+
+    #[test]
+    fn metrics_default_is_zeroed() {
+        let m = Metrics::default();
+        assert_eq!(m.simulations_run, 0);
+        assert_eq!(m.spike_rate(), 0.0);
+    }
+
+    #[test]
+    fn record_simulation_increments_counters() {
+        let mut m = Metrics::default();
+        m.record_simulation(1000, 5);
+        assert_eq!(m.simulations_run, 1);
+        assert_eq!(m.total_fee_points, 1000);
+        assert_eq!(m.spikes_detected, 5);
+    }
+
+    #[test]
+    fn spike_rate_computes_correct_percentage() {
+        let mut m = Metrics::default();
+        m.record_simulation(1000, 50);
+        assert!((m.spike_rate() - 5.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn merge_combines_two_metric_sets() {
+        let mut a = sample_metrics();
+        let mut b = Metrics::default();
+        b.record_simulation(200, 1);
+        a.merge(&b);
+        assert_eq!(a.simulations_run, 3);
+        assert_eq!(a.total_fee_points, 1700);
+    }
+
+    #[test]
+    fn reset_clears_everything() {
+        let mut m = sample_metrics();
+        m.reset();
+        assert_eq!(m.simulations_run, 0);
+    }
+
+    #[test]
+    fn display_includes_all_fields() {
+        let out = sample_metrics().display();
+        assert!(out.contains("simulations"));
+        assert!(out.contains("spikes"));
+        assert!(out.contains("cache_hits"));
+    }
+
+    #[test]
+    fn json_output_is_valid_object() {
+        let json = sample_metrics().to_json();
+        assert!(json.starts_with('{'));
+        assert!(json.contains("simulations_run"));
+    }
+
+    #[test]
+    fn metrics_args_default() {
+        let args = MetricsArgs::default();
+        assert!(!args.json && !args.reset);
+    }
+}
diff --git a/packages/devkit/src/monitoring/mod.rs b/packages/devkit/src/monitoring/mod.rs
new file mode 100644
index 0000000..6c4f2e4
--- /dev/null
+++ b/packages/devkit/src/monitoring/mod.rs
@@ -0,0 +1,2 @@
+pub mod health;
+pub mod metrics;