From 63cb18138c94853b0786bff5373f482874188e5c Mon Sep 17 00:00:00 2001 From: andriypolanski Date: Sat, 23 May 2026 06:12:09 -0400 Subject: [PATCH 1/2] fix(health): log SQLite health_log insert and prune failure --- crates/genie-health/src/checker.rs | 143 ++++++++++++++++++++++++++--- 1 file changed, 129 insertions(+), 14 deletions(-) diff --git a/crates/genie-health/src/checker.rs b/crates/genie-health/src/checker.rs index 23d6707..50a625c 100644 --- a/crates/genie-health/src/checker.rs +++ b/crates/genie-health/src/checker.rs @@ -111,17 +111,7 @@ impl HealthMonitor { for (name, url) in &services { let status = check_http(name, url).await; - // Log to SQLite. - let _ = self.db.execute( - "INSERT INTO health_log (ts_ms, service, healthy, response_ms, error) VALUES (?1, ?2, ?3, ?4, ?5)", - rusqlite::params![ - ts_ms, - status.name, - status.healthy as i32, - status.response_ms, - status.error, - ], - ); + insert_health_log(&self.db, ts_ms, &status); if status.healthy { if self.failure_counts.remove(name).is_some() { @@ -147,9 +137,7 @@ impl HealthMonitor { // Prune logs older than 24h every ~120 checks (~1 hour at 30s interval). let cutoff = ts_ms.saturating_sub(24 * 3600 * 1000); - let _ = self - .db - .execute("DELETE FROM health_log WHERE ts_ms < ?1", [cutoff]); + prune_health_log(&self.db, cutoff); } async fn send_alert(&self, status: &ServiceStatus) { @@ -181,6 +169,38 @@ impl HealthMonitor { } } +fn insert_health_log(db: &Connection, ts_ms: u64, status: &ServiceStatus) { + if let Err(e) = db.execute( + "INSERT INTO health_log (ts_ms, service, healthy, response_ms, error) VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![ + ts_ms, + status.name, + status.healthy as i32, + status.response_ms, + status.error, + ], + ) { + tracing::error!( + service = %status.name, + error = %e, + "failed to insert health_log row" + ); + } +} + +fn prune_health_log(db: &Connection, cutoff_ts_ms: u64) { + if let Err(e) = db.execute( + "DELETE FROM health_log WHERE ts_ms < ?1", + [cutoff_ts_ms], + ) { + tracing::error!( + cutoff_ts_ms, + error = %e, + "failed to prune health_log rows" + ); + } +} + async fn check_http(name: &str, url: &str) -> ServiceStatus { let start = std::time::Instant::now(); @@ -367,4 +387,99 @@ mod tests { assert_eq!(llm_url, "http://127.0.0.1:9999/v1/health"); } + + fn open_test_db(dir: &std::path::Path) -> Connection { + let db_path = dir.join("health.db"); + let db = Connection::open(&db_path).unwrap(); + db.execute_batch( + " + CREATE TABLE IF NOT EXISTS health_log ( + ts_ms INTEGER NOT NULL, + service TEXT NOT NULL, + healthy INTEGER NOT NULL, + response_ms INTEGER NOT NULL, + error TEXT + ); + ", + ) + .unwrap(); + db + } + + #[test] + fn health_log_insert_and_prune_on_writable_db() { + let dir = std::env::temp_dir().join(format!( + "genie-health-writable-{}", + std::process::id() + )); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + + let db = open_test_db(&dir); + let status = ServiceStatus { + name: "core".into(), + url: "http://127.0.0.1:3000/api/health".into(), + healthy: true, + response_ms: 12, + error: None, + }; + + insert_health_log(&db, 1_000, &status); + insert_health_log(&db, 2_000, &status); + + let count: i64 = db + .query_row("SELECT COUNT(*) FROM health_log", [], |row| row.get(0)) + .unwrap(); + assert_eq!(count, 2); + + prune_health_log(&db, 1_500); + let count: i64 = db + .query_row("SELECT COUNT(*) FROM health_log", [], |row| row.get(0)) + .unwrap(); + assert_eq!(count, 1); + + let _ = std::fs::remove_dir_all(&dir); + } + + #[test] + #[cfg(unix)] + fn health_log_write_errors_do_not_panic_on_readonly_db() { + use std::os::unix::fs::PermissionsExt; + + let dir = std::env::temp_dir().join(format!("genie-health-readonly-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&dir); + std::fs::create_dir_all(&dir).unwrap(); + + let db_path = dir.join("health.db"); + { + let db = open_test_db(&dir); + drop(db); + } + + let mut perms = std::fs::metadata(&db_path).unwrap().permissions(); + perms.set_mode(0o444); + std::fs::set_permissions(&db_path, perms).unwrap(); + + let db = Connection::open_with_flags( + &db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY, + ) + .unwrap(); + + let status = ServiceStatus { + name: "core".into(), + url: "http://127.0.0.1:3000/api/health".into(), + healthy: false, + response_ms: 0, + error: Some("timeout".into()), + }; + + insert_health_log(&db, 9_000, &status); + prune_health_log(&db, 0); + + let mut perms = std::fs::metadata(&db_path).unwrap().permissions(); + perms.set_mode(0o644); + std::fs::set_permissions(&db_path, perms).unwrap(); + let _ = std::fs::remove_dir_all(&dir); + } } From a95b34172759a11fed6b5161dcd01923c695c96f Mon Sep 17 00:00:00 2001 From: andriypolanski Date: Sat, 23 May 2026 06:25:34 -0400 Subject: [PATCH 2/2] style: format code with cargo fmt --- crates/genie-health/src/checker.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/crates/genie-health/src/checker.rs b/crates/genie-health/src/checker.rs index 50a625c..9e2526a 100644 --- a/crates/genie-health/src/checker.rs +++ b/crates/genie-health/src/checker.rs @@ -189,10 +189,7 @@ fn insert_health_log(db: &Connection, ts_ms: u64, status: &ServiceStatus) { } fn prune_health_log(db: &Connection, cutoff_ts_ms: u64) { - if let Err(e) = db.execute( - "DELETE FROM health_log WHERE ts_ms < ?1", - [cutoff_ts_ms], - ) { + if let Err(e) = db.execute("DELETE FROM health_log WHERE ts_ms < ?1", [cutoff_ts_ms]) { tracing::error!( cutoff_ts_ms, error = %e, @@ -408,10 +405,8 @@ mod tests { #[test] fn health_log_insert_and_prune_on_writable_db() { - let dir = std::env::temp_dir().join(format!( - "genie-health-writable-{}", - std::process::id() - )); + let dir = + std::env::temp_dir().join(format!("genie-health-writable-{}", std::process::id())); let _ = std::fs::remove_dir_all(&dir); std::fs::create_dir_all(&dir).unwrap(); @@ -446,7 +441,8 @@ mod tests { fn health_log_write_errors_do_not_panic_on_readonly_db() { use std::os::unix::fs::PermissionsExt; - let dir = std::env::temp_dir().join(format!("genie-health-readonly-{}", std::process::id())); + let dir = + std::env::temp_dir().join(format!("genie-health-readonly-{}", std::process::id())); let _ = std::fs::remove_dir_all(&dir); std::fs::create_dir_all(&dir).unwrap(); @@ -460,11 +456,8 @@ mod tests { perms.set_mode(0o444); std::fs::set_permissions(&db_path, perms).unwrap(); - let db = Connection::open_with_flags( - &db_path, - rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY, - ) - .unwrap(); + let db = Connection::open_with_flags(&db_path, rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY) + .unwrap(); let status = ServiceStatus { name: "core".into(),