|
15 | 15 | from fastapi.middleware.cors import CORSMiddleware |
16 | 16 | from starlette.middleware.base import BaseHTTPMiddleware |
17 | 17 |
|
18 | | -# Failover read-only mode — actif si FAILOVER_MODE=true dans l'env systemd |
19 | | -_FAILOVER_MODE = os.environ.get("FAILOVER_MODE", "").lower() == "true" |
| 18 | +# Failover read-only mode — read dynamically from failover_state.json |
| 19 | +# (replaces static systemd env var to avoid stale FAILOVER_MODE after restarts) |
| 20 | +_FAILOVER_STATE_FILE = "/opt/claude-ceo/brain/failover_state.json" |
| 21 | +_failover_cache = {"value": False, "checked_at": 0.0} |
| 22 | + |
| 23 | +def _is_failover_mode() -> bool: |
| 24 | + """Return True if failover_state.json says mode=failover. Cached 10s.""" |
| 25 | + import time |
| 26 | + now = time.monotonic() |
| 27 | + if now - _failover_cache["checked_at"] < 10: |
| 28 | + return _failover_cache["value"] |
| 29 | + try: |
| 30 | + with open(_FAILOVER_STATE_FILE) as f: |
| 31 | + state = json.load(f) |
| 32 | + result = state.get("mode") == "failover" |
| 33 | + except (FileNotFoundError, json.JSONDecodeError, OSError): |
| 34 | + result = False |
| 35 | + _failover_cache["value"] = result |
| 36 | + _failover_cache["checked_at"] = now |
| 37 | + return result |
20 | 38 |
|
21 | 39 | # Endpoints d'écriture bloqués en mode failover |
22 | 40 | _FAILOVER_BLOCKED_PATHS = { |
@@ -57,7 +75,7 @@ async def dispatch(self, request: Request, call_next): |
57 | 75 | class FailoverReadOnlyMiddleware(BaseHTTPMiddleware): |
58 | 76 | """Bloque les écritures en mode failover pour éviter le split-brain.""" |
59 | 77 | async def dispatch(self, request: Request, call_next): |
60 | | - if _FAILOVER_MODE and request.method == "POST": |
| 78 | + if _is_failover_mode() and request.method == "POST": |
61 | 79 | path = request.url.path.rstrip("/") |
62 | 80 | if path in _FAILOVER_BLOCKED_PATHS: |
63 | 81 | return JSONResponse( |
@@ -394,9 +412,8 @@ async def lifespan(app): |
394 | 412 | allow_credentials=False, |
395 | 413 | ) |
396 | 414 |
|
397 | | -if _FAILOVER_MODE: |
398 | | - app.add_middleware(FailoverReadOnlyMiddleware) |
399 | | - logger.warning("FAILOVER_MODE=true — service en lecture seule (POST /v1/proxy bloqué)") |
| 415 | +# Middleware always active — _is_failover_mode() checks state file dynamically |
| 416 | +app.add_middleware(FailoverReadOnlyMiddleware) |
400 | 417 |
|
401 | 418 | # --- Feature routers (v1.4+) --- |
402 | 419 | from .routers.assess import router as _assess_router |
@@ -1581,8 +1598,9 @@ async def health(): |
1581 | 1598 | "timestamp": datetime.now(timezone.utc).isoformat(), |
1582 | 1599 | "environment": TRUST_LAYER_ENV, |
1583 | 1600 | } |
1584 | | - resp["mode"] = "failover" if _FAILOVER_MODE else "primary" |
1585 | | - resp["write_enabled"] = not _FAILOVER_MODE |
| 1601 | + failover = _is_failover_mode() |
| 1602 | + resp["mode"] = "failover" if failover else "primary" |
| 1603 | + resp["write_enabled"] = not failover |
1586 | 1604 | return resp |
1587 | 1605 |
|
1588 | 1606 |
|
|
0 commit comments