Skip to content

Commit df7b1a3

Browse files
desioracclaude
andcommitted
fix(failover): dynamic FAILOVER_MODE via state file — eliminates stale systemd env var
Replace static os.environ FAILOVER_MODE read (set once at startup from systemd) with dynamic reading from brain/failover_state.json (10s cache). failover_monitor.py already manages this state file. Removes class of bug where FAILOVER_MODE=true persists across restarts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9ffb6de commit df7b1a3

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

trust_layer/app.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,26 @@
1515
from fastapi.middleware.cors import CORSMiddleware
1616
from starlette.middleware.base import BaseHTTPMiddleware
1717

18-
# Failover read-only mode — actif si FAILOVER_MODE=true dans l'env systemd
19-
_FAILOVER_MODE = os.environ.get("FAILOVER_MODE", "").lower() == "true"
18+
# Failover read-only mode — read dynamically from failover_state.json
19+
# (replaces static systemd env var to avoid stale FAILOVER_MODE after restarts)
20+
_FAILOVER_STATE_FILE = "/opt/claude-ceo/brain/failover_state.json"
21+
_failover_cache = {"value": False, "checked_at": 0.0}
22+
23+
def _is_failover_mode() -> bool:
24+
"""Return True if failover_state.json says mode=failover. Cached 10s."""
25+
import time
26+
now = time.monotonic()
27+
if now - _failover_cache["checked_at"] < 10:
28+
return _failover_cache["value"]
29+
try:
30+
with open(_FAILOVER_STATE_FILE) as f:
31+
state = json.load(f)
32+
result = state.get("mode") == "failover"
33+
except (FileNotFoundError, json.JSONDecodeError, OSError):
34+
result = False
35+
_failover_cache["value"] = result
36+
_failover_cache["checked_at"] = now
37+
return result
2038

2139
# Endpoints d'écriture bloqués en mode failover
2240
_FAILOVER_BLOCKED_PATHS = {
@@ -57,7 +75,7 @@ async def dispatch(self, request: Request, call_next):
5775
class FailoverReadOnlyMiddleware(BaseHTTPMiddleware):
5876
"""Bloque les écritures en mode failover pour éviter le split-brain."""
5977
async def dispatch(self, request: Request, call_next):
60-
if _FAILOVER_MODE and request.method == "POST":
78+
if _is_failover_mode() and request.method == "POST":
6179
path = request.url.path.rstrip("/")
6280
if path in _FAILOVER_BLOCKED_PATHS:
6381
return JSONResponse(
@@ -394,9 +412,8 @@ async def lifespan(app):
394412
allow_credentials=False,
395413
)
396414

397-
if _FAILOVER_MODE:
398-
app.add_middleware(FailoverReadOnlyMiddleware)
399-
logger.warning("FAILOVER_MODE=true — service en lecture seule (POST /v1/proxy bloqué)")
415+
# Middleware always active — _is_failover_mode() checks state file dynamically
416+
app.add_middleware(FailoverReadOnlyMiddleware)
400417

401418
# --- Feature routers (v1.4+) ---
402419
from .routers.assess import router as _assess_router
@@ -1581,8 +1598,9 @@ async def health():
15811598
"timestamp": datetime.now(timezone.utc).isoformat(),
15821599
"environment": TRUST_LAYER_ENV,
15831600
}
1584-
resp["mode"] = "failover" if _FAILOVER_MODE else "primary"
1585-
resp["write_enabled"] = not _FAILOVER_MODE
1601+
failover = _is_failover_mode()
1602+
resp["mode"] = "failover" if failover else "primary"
1603+
resp["write_enabled"] = not failover
15861604
return resp
15871605

15881606

0 commit comments

Comments
 (0)