diff --git a/.env.example b/.env.example index 153ba7d6..14a2ec99 100644 --- a/.env.example +++ b/.env.example @@ -75,6 +75,8 @@ MAX_TOKENS_PER_REQUEST=4000 # Monitoring and Analytics SENTRY_DSN=your-sentry-dsn-for-error-tracking +SENTRY_TRACES_SAMPLE_RATE=0.1 +VERSION=1.0.0 PROMETHEUS_ENABLED=true GRAFANA_ADMIN_USER=admin GRAFANA_ADMIN_PASSWORD=change-this-secure-grafana-password diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..7c9665ea --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,69 @@ +# Code Ownership Rules +# +# This CODEOWNERS file defines code ownership and review requirements for the ModPorter-AI project. +# Review is required from code owners before merging changes. +# +# For more information about CODEOWNERS, see: +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners + +# ============================================================================ +# Default Owners (catch-all for any file not matching specific rules) +# ============================================================================ +* @alex + +# ============================================================================ +# Frontend Team - React/TypeScript UI Components +# ============================================================================ +# All frontend-related files require review from frontend maintainers +/frontend/ @alex + +# ============================================================================ +# Backend Team - Python API and Server +# ============================================================================ +# All backend-related files require review from backend maintainers +/backend/ @alex + +# ============================================================================ +# AI-Engine Team - ML/AI Components +# ============================================================================ +# All AI engine-related files require review from AI engine maintainers +/ai-engine/ @alex + +# ============================================================================ +# Infrastructure & DevOps +# ============================================================================ +# Docker and infrastructure configurations +/docker/ @alex +docker-compose*.yml @alex +Dockerfile* @alex + +# ============================================================================ +# Security & Compliance +# ============================================================================ +# Security-related files require review from security team +/.github/security-check.sh @alex +/.github/security-config-guide.md @alex + +# ============================================================================ +# Documentation +# ============================================================================ +# Documentation changes can be reviewed by any maintainer +/docs/ @alex +*.md @alex +!/.github/*.md + +# ============================================================================ +# Configuration Files +# ============================================================================ +# Project-wide configuration files +/.github/ @alex +/database/ @alex +/monitoring/ @alex +/scripts/ @alex +/modporter/ @alex +/tests/ @alex + +# ============================================================================ +# CI/CD Workflows +# ============================================================================ +/.github/workflows/ @alex diff --git a/README.md b/README.md index d47af974..0ba111d2 100644 --- a/README.md +++ b/README.md @@ -178,8 +178,14 @@ All services include health checks for monitoring: # Check frontend health curl http://localhost:3000/health -# Check backend health -curl http://localhost:8080/api/v1/health +# Check backend health (basic liveness) +curl http://localhost:8080/health + +# Check backend readiness (includes dependency checks) +curl http://localhost:8080/health/readiness + +# Check backend liveness (process running) +curl http://localhost:8080/health/liveness # Check AI engine health curl http://localhost:8001/api/v1/health @@ -188,6 +194,43 @@ curl http://localhost:8001/api/v1/health docker compose ps ``` +### Health Check Endpoints + +The backend provides three health check endpoints for Kubernetes probes: + +| Endpoint | Purpose | Dependencies Checked | +|----------|---------|---------------------| +| `/health` | Basic health check | None | +| `/health/liveness` | Process is running | None | +| `/health/readiness` | Can serve traffic | Database, Redis | + +**Response Format:** +```json +{ + "status": "healthy", + "timestamp": "2024-01-01T00:00:00", + "checks": { + "dependencies": { + "database": { + "status": "healthy", + "latency_ms": 5.2, + "message": "Database connection successful" + }, + "redis": { + "status": "healthy", + "latency_ms": 1.8, + "message": "Redis connection successful" + } + } + } +} +``` + +**Status Values:** +- `healthy`: All checks passed +- `degraded`: Non-critical dependencies unavailable (e.g., Redis) +- `unhealthy`: Critical dependencies unavailable (e.g., Database) + ### Troubleshooting #### Common Issues diff --git a/ai-engine/main.py b/ai-engine/main.py index a6ebc06b..f20a88bb 100644 --- a/ai-engine/main.py +++ b/ai-engine/main.py @@ -16,13 +16,20 @@ import redis.asyncio as aioredis # Configure logging using centralized configuration -from utils.logging_config import setup_logging, get_agent_logger +from utils.logging_config import setup_logging, get_agent_logger, configure_structlog # Load environment variables load_dotenv() # Setup logging with environment-based configuration debug_mode = os.getenv("DEBUG", "false").lower() == "true" + +# Also configure structlog for structured JSON logging in production +configure_structlog( + debug_mode=debug_mode, + json_format=os.getenv("LOG_JSON_FORMAT", "false").lower() == "true" +) + setup_logging( debug_mode=debug_mode, enable_file_logging=os.getenv("ENABLE_FILE_LOGGING", "true").lower() == "true" diff --git a/ai-engine/requirements.txt b/ai-engine/requirements.txt index 04a9c7f7..c6f209d9 100644 --- a/ai-engine/requirements.txt +++ b/ai-engine/requirements.txt @@ -48,4 +48,5 @@ pydantic-settings # Monitoring prometheus-client -psutil \ No newline at end of file +psutil +structlog>=24.0.0 \ No newline at end of file diff --git a/ai-engine/utils/logging_config.py b/ai-engine/utils/logging_config.py index aa72d58b..713c5c0e 100644 --- a/ai-engine/utils/logging_config.py +++ b/ai-engine/utils/logging_config.py @@ -1,22 +1,23 @@ """ Centralized logging configuration for ModPorter AI Engine -Provides structured logging for all agents and crew operations - -Issue #549: Enhanced with comprehensive agent logging capabilities -- Structured logging for all agents -- Agent decisions and reasoning logging -- Tool usage and results logging -- Debug mode for verbose output -- Log analysis tools +Provides structured logging using structlog for all agents and crew operations + +Issue #695: Add structured logging +- Uses structlog for structured JSON logging +- Supports both console and JSON formats +- Auto-detects production mode for JSON output +- Correlation ID support for request tracing """ import logging import logging.handlers +import structlog import os import sys import time import threading import traceback +import uuid from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, List @@ -25,6 +26,101 @@ from collections import defaultdict import json +# Context variable for correlation ID +correlation_id_var: ContextVar[Optional[str]] = ContextVar("correlation_id", default=None) + + +def configure_structlog( + log_level: str = None, + log_file: Optional[str] = None, + json_format: bool = None, + debug_mode: bool = False, +): + """ + Configure structlog for the AI engine. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR) + log_file: Path to log file (optional) + json_format: Use JSON format (auto-detected from environment if None) + debug_mode: Enable debug mode for verbose output + """ + if log_level is None: + log_level = os.getenv("LOG_LEVEL", "INFO").upper() + + # Auto-detect JSON format in production + if json_format is None: + json_format = os.getenv("LOG_JSON_FORMAT", "false").lower() == "true" + if os.getenv("ENVIRONMENT", "development") == "production": + json_format = True + + # Get log directory + log_dir = os.getenv("LOG_DIR", "/tmp/modporter-ai/logs") + + # Configure processors based on format + processors = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + ] + + if debug_mode: + processors.append(structlog.dev.ConsoleRenderer()) + elif json_format: + processors.append(structlog.processors.JSONRenderer()) + else: + processors.append(structlog.dev.ConsoleRenderer(colors=False)) + + # Add exception info processor + processors.append(structlog.processors.StackInfoRenderer()) + processors.append(structlog.processors.format_exc_info) + + # Configure structlog + structlog.configure( + processors=processors, + wrapper_class=structlog.stdlib.BoundLogger, + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + cache_logger_on_first_use=True, + ) + + # Also configure standard library logging + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level, logging.INFO)) + + # Clear existing handlers + root_logger.handlers.clear() + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, log_level, logging.INFO)) + console_handler.setFormatter(logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + )) + root_logger.addHandler(console_handler) + + # File handler for production + if log_file is None: + os.makedirs(log_dir, exist_ok=True) + log_file = os.path.join(log_dir, "ai-engine.log") + + file_handler = logging.handlers.RotatingFileHandler( + log_file, + maxBytes=10 * 1024 * 1024, # 10MB + backupCount=5, + encoding='utf-8' + ) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(logging.Formatter( + "%(message)s" + )) + root_logger.addHandler(file_handler) + + return structlog.get_logger() + class AgentLogFormatter(logging.Formatter): """Custom formatter for agent logging with structured output""" @@ -234,6 +330,58 @@ def get_agent_logger(agent_name: str) -> AgentLogger: return AgentLogger(logger_name) +def get_structlog_logger(name: str = None) -> structlog.BoundLogger: + """ + Get a structlog logger instance. + + Args: + name: Logger name (optional) + + Returns: + Configured structlog logger + """ + if name: + return structlog.get_logger(name) + return structlog.get_logger() + + +def set_correlation_id(correlation_id: Optional[str] = None) -> str: + """ + Set the correlation ID for the current context. + + Args: + correlation_id: Optional correlation ID to use + + Returns: + The correlation ID (either provided or generated) + """ + if correlation_id is None: + correlation_id = str(uuid.uuid4()) + + correlation_id_var.set(correlation_id) + structlog.contextvars.clear_contextvars() + structlog.contextvars.bind_contextvars(correlation_id=correlation_id) + return correlation_id + + +def get_correlation_id() -> Optional[str]: + """ + Get the current correlation ID from the context. + + Returns: + Current correlation ID or None + """ + return correlation_id_var.get() + + +def clear_correlation_id() -> None: + """ + Clear the correlation ID from the current context. + """ + correlation_id_var.set(None) + structlog.contextvars.clear_contextvars() + + def get_crew_logger() -> AgentLogger: """Get a configured logger for crew operations""" return AgentLogger("crew.conversion_crew") diff --git a/backend/main.py b/backend/main.py index a6e0f8a3..7022bb44 100644 --- a/backend/main.py +++ b/backend/main.py @@ -19,6 +19,31 @@ from db.init_db import init_db from api.feedback import router as feedback_router +# Sentry error tracking initialization +import sentry_sdk +from sentry_sdk.integrations.fastapi import FastApiIntegration +from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration + +SENTRY_DSN = os.getenv("SENTRY_DSN") +if SENTRY_DSN: + sentry_sdk.init( + dsn=SENTRY_DSN, + integrations=[ + FastApiIntegration(), + SqlalchemyIntegration(), + ], + # Set traces_sample_rate to 1.0 to capture 100% of transactions for tracing + traces_sample_rate=float(os.getenv("SENTRY_TRACES_SAMPLE_RATE", "0.1")), + # Include environment and release info + environment=os.getenv("ENVIRONMENT", "development"), + release=os.getenv("VERSION", "1.0.0"), + # Attach serverless context + send_default_pii=False, + # Filter out common non-critical events + before_send=lambda event, hint: None if 'ignore' in hint else event, + ) + print(f"Sentry error tracking initialized for environment: {os.getenv('ENVIRONMENT', 'development')}") + # AI Engine settings diff --git a/backend/requirements.txt b/backend/requirements.txt index 6054664b..a2c84bb1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -30,6 +30,8 @@ httpx==0.28.1 # Monitoring prometheus_client>=0.17.0 +sentry-sdk[fastapi]>=2.0.0 +structlog>=24.0.0 # Testing pytest>=8.2 diff --git a/backend/src/api/health.py b/backend/src/api/health.py new file mode 100644 index 00000000..70b9c969 --- /dev/null +++ b/backend/src/api/health.py @@ -0,0 +1,205 @@ +""" +Health check endpoints for Kubernetes readiness and liveness probes. + +This module provides: +- /health/readiness: Checks if the application can serve traffic (dependencies available) +- /health/liveness: Checks if the application is running and doesn't need to be restarted + +Issue #699: Add health check endpoints +Readiness Pillar: Debugging & Observability +""" + +import time +from datetime import datetime +from typing import Dict, Any, List +from fastapi import APIRouter, Depends +from pydantic import BaseModel, Field +import logging + +from db.base import async_engine +from services.cache import CacheService + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["health"]) + + +class HealthStatus(BaseModel): + """Health check response model""" + + status: str = Field(..., description="Overall health status: healthy, degraded, or unhealthy") + timestamp: str = Field(..., description="ISO timestamp of the health check") + checks: Dict[str, Any] = Field(..., description="Individual check results") + + +class DependencyHealth(BaseModel): + """Individual dependency health status""" + + name: str + status: str + latency_ms: float = 0.0 + message: str = "" + + +# Cache service instance (same as in main.py) +cache = CacheService() + + +async def check_database_health() -> DependencyHealth: + """ + Check database connectivity and return health status. + """ + start_time = time.time() + + try: + from sqlalchemy import text + + async with async_engine.connect() as conn: + result = await conn.execute(text("SELECT 1")) + result.fetchone() + + latency_ms = (time.time() - start_time) * 1000 + + return DependencyHealth( + name="database", + status="healthy", + latency_ms=latency_ms, + message="Database connection successful" + ) + except Exception as e: + latency_ms = (time.time() - start_time) * 1000 + logger.error(f"Database health check failed: {e}") + + return DependencyHealth( + name="database", + status="unhealthy", + latency_ms=latency_ms, + message=f"Database connection failed: {str(e)}" + ) + + +async def check_redis_health() -> DependencyHealth: + """ + Check Redis connectivity and return health status. + """ + start_time = time.time() + + try: + # Check if Redis is available through cache service + if not cache._redis_available or cache._redis_disabled: + return DependencyHealth( + name="redis", + status="unhealthy", + latency_ms=0.0, + message="Redis is not available or disabled" + ) + + # Try a simple Redis operation + await cache._client.ping() + + latency_ms = (time.time() - start_time) * 1000 + + return DependencyHealth( + name="redis", + status="healthy", + latency_ms=latency_ms, + message="Redis connection successful" + ) + except Exception as e: + latency_ms = (time.time() - start_time) * 1000 + logger.error(f"Redis health check failed: {e}") + + return DependencyHealth( + name="redis", + status="unhealthy", + latency_ms=latency_ms, + message=f"Redis connection failed: {str(e)}" + ) + + +@router.get("/health/readiness", response_model=HealthStatus) +async def readiness_check(): + """ + Readiness probe - checks if the application can serve traffic. + + This endpoint verifies that all required dependencies (database, Redis) + are available. The application should only receive traffic when this + endpoint returns healthy. + + Returns: + HealthStatus with detailed dependency information + """ + checks: List[DependencyHealth] = [] + + # Check database + db_health = await check_database_health() + checks.append(db_health) + + # Check Redis (optional dependency - can be degraded) + redis_health = await check_redis_health() + checks.append(redis_health) + + # Determine overall status + unhealthy_checks = [c for c in checks if c.status == "unhealthy"] + + if unhealthy_checks: + # If database is unhealthy, the app cannot serve traffic + if any(c.name == "database" and c.status == "unhealthy" for c in checks): + status = "unhealthy" + else: + # Redis is optional - degraded status + status = "degraded" + else: + status = "healthy" + + return HealthStatus( + status=status, + timestamp=datetime.utcnow().isoformat(), + checks={ + "dependencies": { + c.name: { + "status": c.status, + "latency_ms": c.latency_ms, + "message": c.message + } + for c in checks + } + } + ) + + +@router.get("/health/liveness", response_model=HealthStatus) +async def liveness_check(): + """ + Liveness probe - checks if the application is running and doesn't need restart. + + This endpoint verifies that the application process is running and can + handle requests. A failing liveness probe indicates the container should + be restarted. + + Returns: + HealthStatus indicating the application is running + """ + # Liveness only checks if the process is running + # No dependency checks - we don't want restart loops + return HealthStatus( + status="healthy", + timestamp=datetime.utcnow().isoformat(), + checks={ + "application": { + "status": "running", + "message": "Application process is running" + } + } + ) + + +@router.get("/health", response_model=HealthStatus) +async def basic_health_check(): + """ + Basic health check endpoint (alias for liveness). + + Returns: + HealthStatus with basic health information + """ + return await liveness_check() diff --git a/backend/src/main.py b/backend/src/main.py index c76fd162..b07a5e94 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -47,6 +47,7 @@ create_global_limiter, ) from services.security_headers import SecurityHeadersMiddleware +from services.logging_middleware import LoggingMiddleware, RequestContextMiddleware # Import API routers from api import ( @@ -64,6 +65,7 @@ conversions, mod_imports, analytics, + health, ) from api.rate_limit_dashboard import router as rate_limit_dashboard_router @@ -73,6 +75,7 @@ MOCK_CONVERSION_RESULT_FAILURE, ) from services.metrics import get_metrics +from services.structured_logging import configure_structlog # Configure logging logging.basicConfig(level=logging.INFO) @@ -168,10 +171,19 @@ async def lifespan(app: FastAPI): # Security Headers Middleware app.add_middleware(SecurityHeadersMiddleware) +# Request/Response Logging Middleware +app.add_middleware(LoggingMiddleware) +app.add_middleware(RequestContextMiddleware) + @app.on_event("startup") async def startup_event(): - """Initialize rate limiter on startup""" + """Initialize rate limiter and structured logging on startup""" + # Configure structured logging + debug_mode = os.getenv("DEBUG", "false").lower() == "true" + configure_structlog(debug_mode=debug_mode) + logger.info("Structured logging configured") + await init_rate_limiter() logger.info("Rate limiting middleware initialized") @@ -200,6 +212,9 @@ async def shutdown_event(): app.include_router(analytics.router, prefix="/api/v1/analytics", tags=["analytics"]) app.include_router(rate_limit_dashboard_router, prefix="/api/v1/rate-limit", tags=["rate-limiting"]) +# Health check endpoints (no prefix - used for Kubernetes probes) +app.include_router(health.router) + # Register exception handlers for comprehensive error handling register_exception_handlers(app) diff --git a/backend/src/services/logging_middleware.py b/backend/src/services/logging_middleware.py new file mode 100644 index 00000000..57f6333a --- /dev/null +++ b/backend/src/services/logging_middleware.py @@ -0,0 +1,168 @@ +""" +Logging Middleware for ModPorter AI Backend +Provides request/response logging with structured logging. + +Issue: #695 - Add structured logging +""" + +import time +import uuid +from typing import Callable +from fastapi import Request, Response +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.types import ASGIApp +import structlog + +from services.structured_logging import set_correlation_id, clear_correlation_id, get_correlation_id + +logger = structlog.get_logger(__name__) + + +class LoggingMiddleware(BaseHTTPMiddleware): + """ + Middleware for logging HTTP requests and responses with correlation IDs. + + Features: + - Automatic correlation ID generation for each request + - Request/response timing + - Structured logging of HTTP method, path, status code + - Request/response body size logging + """ + + def __init__(self, app: ASGIApp, exclude_paths: list = None): + """ + Initialize the logging middleware. + + Args: + app: The ASGI application + exclude_paths: List of paths to exclude from logging (e.g., health checks) + """ + super().__init__(app) + self.exclude_paths = exclude_paths or [ + "/api/v1/metrics", + "/health", + "/metrics", + "/docs", + "/redoc", + "/openapi.json", + ] + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + """Process the request and log details.""" + + # Check if path should be excluded + if self._should_exclude(request.url.path): + return await call_next(request) + + # Generate correlation ID for this request + correlation_id = set_correlation_id() + + # Start timer + start_time = time.time() + + # Build initial log data + request_id = str(uuid.uuid4()) + + # Log request + log = logger.bind( + correlation_id=correlation_id, + request_id=request_id, + method=request.method, + path=request.url.path, + query_params=str(request.query_params) if request.query_params else None, + client_host=request.client.host if request.client else None, + ) + + log.info( + "request_started", + event="request", + path=request.url.path, + method=request.method, + ) + + try: + # Process the request + response = await call_next(request) + + # Calculate duration + duration_ms = (time.time() - start_time) * 1000 + + # Add response details to log + log.bind( + status_code=response.status_code, + duration_ms=round(duration_ms, 2), + ).info( + "request_completed", + event="request", + path=request.url.path, + method=request.method, + status_code=response.status_code, + duration_ms=round(duration_ms, 2), + ) + + # Add correlation ID to response headers + response.headers["X-Correlation-ID"] = correlation_id + response.headers["X-Request-ID"] = request_id + + return response + + except Exception as e: + # Log exception + duration_ms = (time.time() - start_time) * 1000 + + log.error( + "request_failed", + event="request", + path=request.url.path, + method=request.method, + error=str(e), + duration_ms=round(duration_ms, 2), + exc_info=e, + ) + raise + + finally: + # Clear correlation ID + clear_correlation_id() + + def _should_exclude(self, path: str) -> bool: + """Check if the path should be excluded from logging.""" + for exclude_path in self.exclude_paths: + if path.startswith(exclude_path): + return True + return False + + +class RequestContextMiddleware(BaseHTTPMiddleware): + """ + Middleware for setting up request context variables. + + This middleware ensures that correlation IDs and other context + are properly set up for each request. + """ + + def __init__(self, app: ASGIApp): + super().__init__(app) + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + """Set up request context and process the request.""" + + # Get correlation ID from header if present + correlation_id = request.headers.get("X-Correlation-ID") + if correlation_id: + set_correlation_id(correlation_id) + + # Add request metadata + from services.structured_logging import set_request_metadata + set_request_metadata({ + "method": request.method, + "path": request.url.path, + "client_host": request.client.host if request.client else None, + }) + + try: + response = await call_next(request) + return response + finally: + from services.structured_logging import clear_request_metadata + clear_request_metadata() diff --git a/backend/src/services/structured_logging.py b/backend/src/services/structured_logging.py index 6796292b..cf0921ba 100644 --- a/backend/src/services/structured_logging.py +++ b/backend/src/services/structured_logging.py @@ -1,19 +1,22 @@ """ Structured Logging Service for ModPorter AI -Provides correlation IDs, structured JSON logs, and log aggregation support. +Provides structured JSON logs using structlog with correlation IDs and log aggregation support. -Issue: #383 - Structured logging (Phase 3) +Issue: #695 - Add structured logging """ import logging -import json +import structlog import uuid import sys +import os from contextvars import ContextVar from datetime import datetime from typing import Any, Dict, Optional from logging.handlers import RotatingFileHandler -import os +from structlog.processors import JSONRenderer, TimeStamper, add_log_level +from structlog.stdlib import LoggerFactory +from structlog.stdlib import ProcessorFormatter # Context variable to store correlation ID across async operations correlation_id_var: ContextVar[Optional[str]] = ContextVar("correlation_id", default=None) @@ -24,11 +27,109 @@ ) -class StructuredFormatter(logging.Formatter): +def configure_structlog( + log_level: str = None, + log_file: Optional[str] = None, + json_format: bool = None, + debug_mode: bool = False, +): """ - JSON formatter for structured logging with correlation IDs. + Configure structlog for the application. + + Args: + log_level: Logging level (DEBUG, INFO, WARNING, ERROR) + log_file: Path to log file (optional) + json_format: Use JSON format (auto-detected from environment if None) + debug_mode: Enable debug mode for verbose output """ - + if log_level is None: + log_level = os.getenv("LOG_LEVEL", "INFO").upper() + + # Auto-detect JSON format in production + if json_format is None: + json_format = os.getenv("LOG_JSON_FORMAT", "false").lower() == "true" + # Also enable JSON if running in production environment + if os.getenv("ENVIRONMENT", "development") == "production": + json_format = True + + # Get log directory + log_dir = os.getenv("LOG_DIR", "/var/log/modporter") + + # Configure processors based on format + processors = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + TimeStamper(fmt="iso"), + ] + + if debug_mode: + processors.append(structlog.dev.ConsoleRenderer()) + elif json_format: + processors.append(JSONRenderer()) + else: + processors.append(structlog.dev.ConsoleRenderer(colors=False)) + + # Add exception info processor + processors.append(structlog.processors.StackInfoRenderer()) + processors.append(structlog.processors.format_exc_info) + + # Configure structlog + structlog.configure( + processors=processors, + wrapper_class=structlog.stdlib.BoundLogger, + context_class=dict, + logger_factory=LoggerFactory(), + cache_logger_on_first_use=True, + ) + + # Also configure standard library logging + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level, logging.INFO)) + + # Clear existing handlers + root_logger.handlers.clear() + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + if json_format: + # Use structlog for JSON output + console_handler.setFormatter(LoggingFormatter(debug_mode=debug_mode)) + else: + console_handler.setFormatter( + logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + ) + root_logger.addHandler(console_handler) + + # File handler for production + if log_file is None: + os.makedirs(log_dir, exist_ok=True) + log_file = os.path.join(log_dir, "modporter.log") + + file_handler = RotatingFileHandler( + log_file, + maxBytes=10 * 1024 * 1024, # 10MB + backupCount=5, + ) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(LoggingFormatter(json_format=True)) + root_logger.addHandler(file_handler) + + return structlog.get_logger() + + +class LoggingFormatter(logging.Formatter): + """Formatter that integrates structlog with standard logging""" + + def __init__(self, json_format: bool = False, debug_mode: bool = False): + super().__init__() + self.json_format = json_format + self.debug_mode = debug_mode + def format(self, record: logging.LogRecord) -> str: # Build structured log data log_data = { @@ -40,52 +141,60 @@ def format(self, record: logging.LogRecord) -> str: "function": record.funcName, "line": record.lineno, } - + # Add correlation ID if available correlation_id = correlation_id_var.get() if correlation_id: log_data["correlation_id"] = correlation_id - + # Add request metadata if available metadata = request_metadata_var.get() if metadata: log_data["request"] = metadata - + # Add exception info if present if record.exc_info: log_data["exception"] = self.formatException(record.exc_info) - + # Add extra fields from the record if hasattr(record, "extra_data"): log_data.update(record.extra_data) - + # Add performance metrics if available if hasattr(record, "duration_ms"): log_data["duration_ms"] = record.duration_ms - - return json.dumps(log_data) + + if self.json_format: + import json + return json.dumps(log_data) + else: + # Plain text format + corr_str = f"[{correlation_id[:8]}...] " if correlation_id else "" + return f"{log_data['timestamp']} {record.levelname} {corr_str}{record.getMessage()}" -class PlainFormatter(logging.Formatter): - """ - Human-readable formatter for development. +def get_logger(name: str) -> structlog.BoundLogger: """ + Get a configured structlog logger instance. - def format(self, record: logging.LogRecord) -> str: - correlation_id = correlation_id_var.get() - corr_str = f"[{correlation_id[:8]}...] " if correlation_id else "" - return f"{self.formatTime(record)} {record.levelname} {corr_str}{record.getMessage()}" + Args: + name: The name for the logger (typically __name__) + + Returns: + Configured structlog logger instance + """ + return structlog.get_logger(name) -def get_logger(name: str) -> logging.Logger: +def get_standard_logger(name: str) -> logging.Logger: """ - Get a configured logger instance. + Get a standard library logger configured to work with structlog. Args: name: The name for the logger (typically __name__) Returns: - Configured logger instance + Configured standard logger instance """ logger = logging.getLogger(name) @@ -97,9 +206,9 @@ def get_logger(name: str) -> logging.Logger: console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) - # Use plain formatter for console in development - console_formatter = PlainFormatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" + console_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" ) console_handler.setFormatter(console_formatter) logger.addHandler(console_handler) @@ -114,7 +223,7 @@ def get_logger(name: str) -> logging.Logger: backupCount=5, ) file_handler.setLevel(logging.INFO) - file_handler.setFormatter(StructuredFormatter()) + file_handler.setFormatter(LoggingFormatter(json_format=True)) logger.addHandler(file_handler) return logger @@ -135,6 +244,8 @@ def set_correlation_id(correlation_id: Optional[str] = None) -> str: correlation_id = str(uuid.uuid4()) correlation_id_var.set(correlation_id) + structlog.contextvars.clear_contextvars() + structlog.contextvars.bind_contextvars(correlation_id=correlation_id) return correlation_id @@ -163,6 +274,7 @@ def set_request_metadata(metadata: Dict[str, Any]) -> None: metadata: Dictionary of metadata to store """ request_metadata_var.set(metadata) + structlog.contextvars.bind_contextvars(**metadata) def clear_request_metadata() -> None: @@ -193,6 +305,13 @@ def __enter__(self): correlation_id_var.set(self.correlation_id) request_metadata_var.set(self.metadata) + + # Bind to structlog context + structlog.contextvars.clear_contextvars() + structlog.contextvars.bind_contextvars( + correlation_id=self.correlation_id, + **self.metadata + ) return self @@ -202,7 +321,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def log_api_request( - logger: logging.Logger, + logger: structlog.BoundLogger, method: str, path: str, status_code: Optional[int] = None, @@ -234,13 +353,15 @@ def log_api_request( log_data.update(extra_fields) - # Create a custom log record with extra data - extra = {"extra_data": log_data} - logger.info(f"{method} {path}", extra=extra) + logger.info(f"{method} {path}", **log_data) def log_conversion_event( - logger: logging.Logger, job_id: str, event: str, progress: Optional[int] = None, **extra_fields + logger: structlog.BoundLogger, + job_id: str, + event: str, + progress: Optional[int] = None, + **extra_fields ) -> None: """ Log a conversion event with structured data. @@ -263,12 +384,11 @@ def log_conversion_event( log_data.update(extra_fields) - extra = {"extra_data": log_data} - logger.info(f"Conversion {job_id}: {event}", extra=extra) + logger.info(f"Conversion {job_id}: {event}", **log_data) def log_error_with_context( - logger: logging.Logger, + logger: structlog.BoundLogger, error: Exception, context: Optional[Dict[str, Any]] = None, **extra_fields, @@ -293,24 +413,29 @@ def log_error_with_context( log_data.update(extra_fields) - extra = {"extra_data": log_data} - logger.error(str(error), exc_info=True, extra=extra) + logger.error(str(error), exc_info=error, **log_data) + + +# Module-level logger with lazy initialization +def _get_module_logger() -> structlog.BoundLogger: + """Get the module-level structlog logger.""" + return structlog.get_logger(__name__) -# Module-level logger with lazy initialization to avoid import errors in test environments -class _LazyLogger: +# Lazy logger proxy +class _LazyStructlogLogger: """Lazy proxy for the default logger that defers initialization until first access.""" _instance = None def __getattr__(self, name): if self._instance is None: - self._instance = get_logger(__name__) + self._instance = _get_module_logger() return getattr(self._instance, name) def __call__(self, *args, **kwargs): if self._instance is None: - self._instance = get_logger(__name__) + self._instance = _get_module_logger() return self._instance(*args, **kwargs) def __repr__(self): @@ -324,4 +449,4 @@ def __str__(self): return str(self._instance) -logger = _LazyLogger() +logger = _LazyStructlogLogger() diff --git a/frontend/.env.example b/frontend/.env.example index 7b5b11c1..7bab065a 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -8,3 +8,16 @@ VITE_API_BASE_URL= # Alternative: Set the base URL without /api/v1 (will be derived) # VITE_API_URL=http://localhost:8000/api/v1 + +# Sentry Error Tracking Configuration +# Get your DSN from https://sentry.io/settings//projects//keys/ +VITE_SENTRY_DSN= + +# Sentry sampling rates (0.0 - 1.0) +VITE_SENTRY_TRACES_SAMPLE_RATE=0.1 + +# App version (optional, defaults to 1.0.0) +VITE_APP_VERSION=1.0.0 + +# Enable Sentry in development (optional, for debugging) +# VITE_SENTRY_ENABLE_DEV=true diff --git a/frontend/package.json b/frontend/package.json index a5ecf20e..078f1fb7 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -30,6 +30,7 @@ "@monaco-editor/react": "^4.7.0", "@mui/icons-material": "^7.3.8", "@mui/material": "^7.3.5", + "@sentry/react": "^8.0.0", "@tanstack/react-query": "^5.90.21", "axios": "^1.13.6", "date-fns": "^4.1.0", diff --git a/frontend/src/components/ErrorBoundary/ErrorBoundary.tsx b/frontend/src/components/ErrorBoundary/ErrorBoundary.tsx index 38c1bb5e..1e3d5ab3 100644 --- a/frontend/src/components/ErrorBoundary/ErrorBoundary.tsx +++ b/frontend/src/components/ErrorBoundary/ErrorBoundary.tsx @@ -1,9 +1,11 @@ /** * Error Boundary Component - Day 5 Enhancement * Catches JavaScript errors and provides user-friendly error handling + * Integrated with Sentry for production error tracking */ import React, { Component, ErrorInfo, ReactNode } from 'react'; +import * as Sentry from '@sentry/react'; import './ErrorBoundary.css'; interface Props { @@ -40,6 +42,13 @@ export class ErrorBoundary extends Component { // You can also log the error to an error reporting service console.error('Error caught by boundary:', error, errorInfo); + // Capture error with Sentry for production monitoring + Sentry.captureException(error, { + extra: { + componentStack: errorInfo.componentStack, + }, + }); + this.setState({ error, errorInfo, @@ -205,6 +214,8 @@ export const useErrorHandler = () => { const handleError = React.useCallback((error: Error) => { console.error('Error caught by hook:', error); + // Capture error with Sentry + Sentry.captureException(error); setError(error); }, []); diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx index 62ea64a7..d6c41e6c 100644 --- a/frontend/src/main.tsx +++ b/frontend/src/main.tsx @@ -3,6 +3,41 @@ import { createRoot } from 'react-dom/client'; import './styles/variables.css'; import './index.css'; import App from './App.tsx'; +import * as Sentry from '@sentry/react'; + +// Initialize Sentry for error tracking +// Only initialize if SENTRY_DSN is provided in environment +const sentryDsn = import.meta.env.VITE_SENTRY_DSN; +if (sentryDsn) { + Sentry.init({ + dsn: sentryDsn, + environment: import.meta.env.MODE, + release: `modporter-ai-frontend@${import.meta.env.VITE_APP_VERSION || '1.0.0'}`, + integrations: [ + Sentry.browserTracingIntegration(), + Sentry.replayIntegration({ + maskAllText: false, + blockAllMedia: false, + }), + ], + // Performance monitoring + tracesSampleRate: parseFloat(import.meta.env.VITE_SENTRY_TRACES_SAMPLE_RATE || '0.1'), + // Session replay + replaysSessionSampleRate: 0.1, + replaysOnErrorSampleRate: 1.0, + // Filter events + beforeSend(event) { + // Don't send events in development unless explicitly enabled + if (import.meta.env.MODE === 'development' && !import.meta.env.VITE_SENTRY_ENABLE_DEV) { + return null; + } + return event; + }, + }); + console.log('Sentry error tracking initialized'); +} + +Sentry.addCaptureConsoleIntegration(); createRoot(document.getElementById('root')!).render(