-
Notifications
You must be signed in to change notification settings - Fork 0
feat: Add health check endpoints (#699) #774
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d79e477
a766f85
40703ee
a2a1af8
9b69ccd
9efc356
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| from pydantic import BaseModel, Field | ||
| from typing import Dict, List, Any, Optional | ||
| from datetime import datetime | ||
| import time | ||
| from enum import Enum | ||
| import uvicorn | ||
| import os | ||
|
|
@@ -165,6 +166,22 @@ class HealthResponse(BaseModel): | |
| timestamp: str | ||
| services: Dict[str, str] | ||
|
|
||
|
|
||
| class DependencyHealth(BaseModel): | ||
| """Individual dependency health status""" | ||
| name: str | ||
| status: str | ||
| latency_ms: float = 0.0 | ||
| message: str = "" | ||
|
|
||
|
|
||
| class HealthStatus(BaseModel): | ||
| """Health check response model for readiness/liveness""" | ||
| status: str = Field(..., description="Overall health status: healthy, degraded, or unhealthy") | ||
| timestamp: str = Field(..., description="ISO timestamp of the health check") | ||
| checks: Dict[str, Any] = Field(..., description="Individual check results") | ||
|
|
||
|
|
||
| class ConversionRequest(BaseModel): | ||
| """Conversion request model""" | ||
| job_id: str = Field(..., description="Unique job identifier") | ||
|
|
@@ -240,6 +257,93 @@ async def health_check(): | |
| services=services | ||
| ) | ||
|
|
||
|
|
||
| async def check_redis_health() -> DependencyHealth: | ||
| """ | ||
| Check Redis connectivity and return health status. | ||
| """ | ||
| start_time = time.time() | ||
|
|
||
| try: | ||
| if not redis_client: | ||
| return DependencyHealth( | ||
| name="redis", | ||
| status="unhealthy", | ||
| latency_ms=0.0, | ||
| message="Redis client not initialized" | ||
| ) | ||
|
|
||
| # Try a simple Redis operation | ||
| await redis_client.ping() | ||
|
|
||
| latency_ms = (time.time() - start_time) * 1000 | ||
|
|
||
| return DependencyHealth( | ||
| name="redis", | ||
| status="healthy", | ||
| latency_ms=latency_ms, | ||
| message="Redis connection successful" | ||
| ) | ||
| except Exception as e: | ||
| latency_ms = (time.time() - start_time) * 1000 | ||
| logger.error(f"Redis health check failed: {e}") | ||
|
|
||
| return DependencyHealth( | ||
| name="redis", | ||
| status="unhealthy", | ||
| latency_ms=latency_ms, | ||
| message=f"Redis connection failed: {str(e)}" | ||
| ) | ||
|
|
||
|
|
||
| @app.get("/health/readiness", response_model=HealthStatus, tags=["health"]) | ||
| async def readiness_check(): | ||
| """ | ||
| Readiness probe - checks if the application can serve traffic. | ||
|
|
||
| This endpoint verifies that all required dependencies (Redis) are available. | ||
| The application should only receive traffic when this endpoint returns healthy. | ||
| """ | ||
| checks = [] | ||
|
|
||
| # Check Redis | ||
| redis_health = await check_redis_health() | ||
| checks.append(redis_health) | ||
|
|
||
| # Determine overall status | ||
| unhealthy_checks = [c for c in checks if c.status == "unhealthy"] | ||
|
|
||
| if unhealthy_checks: | ||
| status = "unhealthy" | ||
| else: | ||
| status = "healthy" | ||
|
|
||
| return HealthStatus( | ||
| status=status, | ||
| timestamp=datetime.utcnow().isoformat(), | ||
| checks={ | ||
| "dependencies": { | ||
| c.name: {"status": c.status, "latency_ms": c.latency_ms, "message": c.message} | ||
| for c in checks | ||
| } | ||
| } | ||
| ) | ||
|
|
||
|
|
||
| @app.get("/health/liveness", response_model=HealthStatus, tags=["health"]) | ||
| async def liveness_check(): | ||
| """ | ||
| Liveness probe - checks if the application is running and doesn't need restart. | ||
|
|
||
| This endpoint verifies that the application process is running and can handle requests. | ||
| """ | ||
| return HealthStatus( | ||
| status="healthy", | ||
| timestamp=datetime.utcnow().isoformat(), | ||
| checks={"application": {"status": "running", "message": "Application process is running"}} | ||
| ) | ||
|
Comment on lines
+299
to
+344
|
||
|
|
||
|
|
||
| @app.post("/api/v1/convert", response_model=ConversionResponse, tags=["conversion"]) | ||
| async def start_conversion( | ||
| request: ConversionRequest, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,4 +49,14 @@ pydantic-settings | |
| # Monitoring | ||
| prometheus-client | ||
| psutil | ||
| structlog>=24.0.0 | ||
| structlog>=24.0.0 | ||
|
|
||
| # Distributed Tracing (OpenTelemetry) | ||
| opentelemetry-api>=1.24.0 | ||
| opentelemetry-sdk>=1.24.0 | ||
| opentelemetry-exporter-otlp>=1.24.0 | ||
| # Note: opentelemetry-exporter-jaeger 1.21.0 is the latest version compatible with Python 3.11 | ||
| opentelemetry-exporter-jaeger==1.21.0 | ||
| opentelemetry-instrumentation-fastapi>=0.45b0 | ||
| opentelemetry-instrumentation-httpx>=0.45b0 | ||
| opentelemetry-instrumentation-redis>=0.45b0 | ||
|
Comment on lines
+54
to
+62
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Error logging here drops stack trace context (
exc_info=True), while other error logs in this file include it. Log the exception with stack trace to make health-check failures diagnosable in production.