Merge pull request #712 from roboflow/fix-prometheus

Fix prometheus scraping
roboflow · Oct 3, 2024 · 9b65794 · 9b65794
2 parents 7e27548 + 74ccab7
commit 9b65794
Show file tree

Hide file tree

Showing 16 changed files with 53 additions and 37 deletions.
diff --git a/docker/config/cpu_http.py b/docker/config/cpu_http.py
@@ -3,44 +3,43 @@
 from inference.core.cache import cache
 from inference.core.interfaces.http.http_api import HttpInterface
 from inference.core.interfaces.stream_manager.manager_app.app import start
-from inference.core.managers.active_learning import ActiveLearningManager, BackgroundTaskActiveLearningManager
+from inference.core.managers.active_learning import (
+    ActiveLearningManager,
+    BackgroundTaskActiveLearningManager,
+)
 from inference.core.managers.base import ModelManager
 from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
 from inference.core.registries.roboflow import (
     RoboflowModelRegistry,
 )
-import os
-from prometheus_fastapi_instrumentator import Instrumentator
 
-from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA, ENABLE_STREAM_API
+from inference.core.env import (
+    MAX_ACTIVE_MODELS,
+    ACTIVE_LEARNING_ENABLED,
+    LAMBDA,
+    ENABLE_STREAM_API,
+)
 from inference.models.utils import ROBOFLOW_MODEL_TYPES
 
 model_registry = RoboflowModelRegistry(ROBOFLOW_MODEL_TYPES)
 
 if ACTIVE_LEARNING_ENABLED:
     if LAMBDA:
-        model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
+        model_manager = ActiveLearningManager(
+            model_registry=model_registry, cache=cache
+        )
     else:
-        model_manager = BackgroundTaskActiveLearningManager(model_registry=model_registry, cache=cache)
+        model_manager = BackgroundTaskActiveLearningManager(
+            model_registry=model_registry, cache=cache
+        )
 else:
     model_manager = ModelManager(model_registry=model_registry)
 
-model_manager = WithFixedSizeCache(
-    model_manager,
-    max_size=MAX_ACTIVE_MODELS
-)
+model_manager = WithFixedSizeCache(model_manager, max_size=MAX_ACTIVE_MODELS)
 model_manager.init_pingback()
 interface = HttpInterface(model_manager)
 app = interface.app
-# Setup Prometheus scraping endpoint at /metrics
-# More info: https://github.com/trallnag/prometheus-fastapi-instrumentator
-if os.environ.get("ENABLE_PROMETHEUS", False):
-    instrumentor = Instrumentator()
-    instrumentor.instrument(app).expose(app)
 
-    @app.on_event("startup")
-    async def _startup():
-        instrumentor.expose(app)
 
 if ENABLE_STREAM_API:
     stream_manager_process = Process(

diff --git a/docker/config/gpu_http.py b/docker/config/gpu_http.py
@@ -1,13 +1,18 @@
-import os
 from multiprocessing import Process
 
-from prometheus_fastapi_instrumentator import Instrumentator
-
 from inference.core.cache import cache
-from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA, ENABLE_STREAM_API
+from inference.core.env import (
+    MAX_ACTIVE_MODELS,
+    ACTIVE_LEARNING_ENABLED,
+    LAMBDA,
+    ENABLE_STREAM_API,
+)
 from inference.core.interfaces.http.http_api import HttpInterface
 from inference.core.interfaces.stream_manager.manager_app.app import start
-from inference.core.managers.active_learning import ActiveLearningManager, BackgroundTaskActiveLearningManager
+from inference.core.managers.active_learning import (
+    ActiveLearningManager,
+    BackgroundTaskActiveLearningManager,
+)
 from inference.core.managers.base import ModelManager
 from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
 from inference.core.registries.roboflow import (
@@ -20,30 +25,23 @@
 
 if ACTIVE_LEARNING_ENABLED:
     if LAMBDA:
-        model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
+        model_manager = ActiveLearningManager(
+            model_registry=model_registry, cache=cache
+        )
     else:
-        model_manager = BackgroundTaskActiveLearningManager(model_registry=model_registry, cache=cache)
+        model_manager = BackgroundTaskActiveLearningManager(
+            model_registry=model_registry, cache=cache
+        )
 else:
     model_manager = ModelManager(model_registry=model_registry)
 
-model_manager = WithFixedSizeCache(
-    model_manager, max_size=MAX_ACTIVE_MODELS
-)
+model_manager = WithFixedSizeCache(model_manager, max_size=MAX_ACTIVE_MODELS)
 model_manager.init_pingback()
 interface = HttpInterface(
     model_manager,
 )
 app = interface.app
 
-# Setup Prometheus scraping endpoint at /metrics
-# More info: https://github.com/trallnag/prometheus-fastapi-instrumentator
-if os.environ.get("ENABLE_PROMETHEUS", False):
-    instrumentor = Instrumentator()
-    instrumentor.instrument(app).expose(app)
-
-    @app.on_event("startup")
-    async def _startup():
-        instrumentor.expose(app)
 
 if ENABLE_STREAM_API:
     stream_manager_process = Process(

diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu b/docker/dockerfiles/Dockerfile.onnx.cpu
@@ -74,5 +74,6 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.dev b/docker/dockerfiles/Dockerfile.onnx.cpu.dev
@@ -74,5 +74,6 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT watchmedo auto-restart --directory=/app/inference --pattern=*.py --recursive -- uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.parallel b/docker/dockerfiles/Dockerfile.onnx.cpu.parallel
@@ -74,6 +74,7 @@ ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT redis-server --io-threads 3 --save --port $REDIS_PORT & \
     celery -A inference.enterprise.parallel.tasks worker --prefetch-multiplier 2 --concurrency $NUM_CELERY_WORKERS -Q pre --loglevel=WARNING & \

diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.slim b/docker/dockerfiles/Dockerfile.onnx.cpu.slim
@@ -59,5 +59,6 @@ ENV PORT=9001
 ENV WORKFLOWS_STEP_EXECUTION_MODE=local
 ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
 ENV API_LOGGING_ENABLED=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu b/docker/dockerfiles/Dockerfile.onnx.gpu
@@ -79,5 +79,6 @@ ENV LMM_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.dev b/docker/dockerfiles/Dockerfile.onnx.gpu.dev
@@ -78,5 +78,6 @@ ENV API_LOGGING_ENABLED=True
 ENV LMM_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.parallel b/docker/dockerfiles/Dockerfile.onnx.gpu.parallel
@@ -63,5 +63,6 @@ ENV REDIS_HOST=localhost
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT python3 entrypoint.py
diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.slim b/docker/dockerfiles/Dockerfile.onnx.gpu.slim
@@ -55,5 +55,6 @@ ENV WORKFLOWS_STEP_EXECUTION_MODE=local
 ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
 ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0
@@ -72,5 +72,6 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1
@@ -87,5 +87,6 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1 b/docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1
@@ -83,5 +83,6 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_TROCR_ENABLED=false
 ENV RUNS_ON_JETSON=True
 ENV ENABLE_STREAM_API=True
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/docker/dockerfiles/Dockerfile.onnx.trt b/docker/dockerfiles/Dockerfile.onnx.trt
@@ -56,5 +56,6 @@ ENV API_LOGGING_ENABLED=True
 ENV PYTHONPATH=/app/:${PYTHONPATH}
 ENV PATH=/opt/miniconda/bin:$PATH
 ENV CORE_MODEL_TROCR_ENABLED=false
+ENV ENABLE_PROMETHEUS=True
 
 ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
diff --git a/inference/core/env.py b/inference/core/env.py
@@ -169,6 +169,8 @@
 # Flag to enable byte track, default is False
 ENABLE_BYTE_TRACK = str2bool(os.getenv("ENABLE_BYTE_TRACK", False))
 
+ENABLE_PROMETHEUS = str2bool(os.getenv("ENABLE_PROMETHEUS", False))
+
 # Flag to enforce FPS, default is False
 ENFORCE_FPS = str2bool(os.getenv("ENFORCE_FPS", False))
 MAX_FPS = os.getenv("MAX_FPS")

diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
@@ -12,6 +12,7 @@
 from fastapi.responses import JSONResponse, RedirectResponse, Response
 from fastapi.staticfiles import StaticFiles
 from fastapi_cprofile.profiler import CProfileMiddleware
+from prometheus_fastapi_instrumentator import Instrumentator
 from starlette.convertors import StringConvertor, register_url_convertor
 from starlette.middleware.base import BaseHTTPMiddleware
 
@@ -110,6 +111,7 @@
     CORE_MODELS_ENABLED,
     DEDICATED_DEPLOYMENT_WORKSPACE_URL,
     DISABLE_WORKFLOW_ENDPOINTS,
+    ENABLE_PROMETHEUS,
     ENABLE_STREAM_API,
     LAMBDA,
     LEGACY_ROUTE_ENABLED,
@@ -489,6 +491,9 @@ def __init__(
             root_path=root_path,
         )
 
+        if ENABLE_PROMETHEUS:
+            Instrumentator().expose(app, endpoint="/metrics")
+
         if METLO_KEY:
             app.add_middleware(
                 ASGIMiddleware, host="https://app.metlo.com", api_key=METLO_KEY