Skip to content

Commit

Permalink
Merge pull request #712 from roboflow/fix-prometheus
Browse files Browse the repository at this point in the history
Fix prometheus scraping
  • Loading branch information
PawelPeczek-Roboflow authored Oct 3, 2024
2 parents 7e27548 + 74ccab7 commit 9b65794
Show file tree
Hide file tree
Showing 16 changed files with 53 additions and 37 deletions.
35 changes: 17 additions & 18 deletions docker/config/cpu_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,43 @@
from inference.core.cache import cache
from inference.core.interfaces.http.http_api import HttpInterface
from inference.core.interfaces.stream_manager.manager_app.app import start
from inference.core.managers.active_learning import ActiveLearningManager, BackgroundTaskActiveLearningManager
from inference.core.managers.active_learning import (
ActiveLearningManager,
BackgroundTaskActiveLearningManager,
)
from inference.core.managers.base import ModelManager
from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
from inference.core.registries.roboflow import (
RoboflowModelRegistry,
)
import os
from prometheus_fastapi_instrumentator import Instrumentator

from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA, ENABLE_STREAM_API
from inference.core.env import (
MAX_ACTIVE_MODELS,
ACTIVE_LEARNING_ENABLED,
LAMBDA,
ENABLE_STREAM_API,
)
from inference.models.utils import ROBOFLOW_MODEL_TYPES

model_registry = RoboflowModelRegistry(ROBOFLOW_MODEL_TYPES)

if ACTIVE_LEARNING_ENABLED:
if LAMBDA:
model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
model_manager = ActiveLearningManager(
model_registry=model_registry, cache=cache
)
else:
model_manager = BackgroundTaskActiveLearningManager(model_registry=model_registry, cache=cache)
model_manager = BackgroundTaskActiveLearningManager(
model_registry=model_registry, cache=cache
)
else:
model_manager = ModelManager(model_registry=model_registry)

model_manager = WithFixedSizeCache(
model_manager,
max_size=MAX_ACTIVE_MODELS
)
model_manager = WithFixedSizeCache(model_manager, max_size=MAX_ACTIVE_MODELS)
model_manager.init_pingback()
interface = HttpInterface(model_manager)
app = interface.app
# Setup Prometheus scraping endpoint at /metrics
# More info: https://github.com/trallnag/prometheus-fastapi-instrumentator
if os.environ.get("ENABLE_PROMETHEUS", False):
instrumentor = Instrumentator()
instrumentor.instrument(app).expose(app)

@app.on_event("startup")
async def _startup():
instrumentor.expose(app)

if ENABLE_STREAM_API:
stream_manager_process = Process(
Expand Down
36 changes: 17 additions & 19 deletions docker/config/gpu_http.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import os
from multiprocessing import Process

from prometheus_fastapi_instrumentator import Instrumentator

from inference.core.cache import cache
from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA, ENABLE_STREAM_API
from inference.core.env import (
MAX_ACTIVE_MODELS,
ACTIVE_LEARNING_ENABLED,
LAMBDA,
ENABLE_STREAM_API,
)
from inference.core.interfaces.http.http_api import HttpInterface
from inference.core.interfaces.stream_manager.manager_app.app import start
from inference.core.managers.active_learning import ActiveLearningManager, BackgroundTaskActiveLearningManager
from inference.core.managers.active_learning import (
ActiveLearningManager,
BackgroundTaskActiveLearningManager,
)
from inference.core.managers.base import ModelManager
from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
from inference.core.registries.roboflow import (
Expand All @@ -20,30 +25,23 @@

if ACTIVE_LEARNING_ENABLED:
if LAMBDA:
model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
model_manager = ActiveLearningManager(
model_registry=model_registry, cache=cache
)
else:
model_manager = BackgroundTaskActiveLearningManager(model_registry=model_registry, cache=cache)
model_manager = BackgroundTaskActiveLearningManager(
model_registry=model_registry, cache=cache
)
else:
model_manager = ModelManager(model_registry=model_registry)

model_manager = WithFixedSizeCache(
model_manager, max_size=MAX_ACTIVE_MODELS
)
model_manager = WithFixedSizeCache(model_manager, max_size=MAX_ACTIVE_MODELS)
model_manager.init_pingback()
interface = HttpInterface(
model_manager,
)
app = interface.app

# Setup Prometheus scraping endpoint at /metrics
# More info: https://github.com/trallnag/prometheus-fastapi-instrumentator
if os.environ.get("ENABLE_PROMETHEUS", False):
instrumentor = Instrumentator()
instrumentor.instrument(app).expose(app)

@app.on_event("startup")
async def _startup():
instrumentor.expose(app)

if ENABLE_STREAM_API:
stream_manager_process = Process(
Expand Down
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,6 @@ ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.cpu.dev
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,6 @@ ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT watchmedo auto-restart --directory=/app/inference --pattern=*.py --recursive -- uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.cpu.parallel
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT redis-server --io-threads 3 --save --port $REDIS_PORT & \
celery -A inference.enterprise.parallel.tasks worker --prefetch-multiplier 2 --concurrency $NUM_CELERY_WORKERS -Q pre --loglevel=WARNING & \
Expand Down
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.cpu.slim
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,6 @@ ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV API_LOGGING_ENABLED=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,6 @@ ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.dev
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,6 @@ ENV API_LOGGING_ENABLED=True
ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.parallel
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,6 @@ ENV REDIS_HOST=localhost
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT python3 entrypoint.py
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.slim
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,6 @@ ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.jetson.4.5.0
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,6 @@ ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.jetson.4.6.1
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,6 @@ ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,6 @@ ENV API_LOGGING_ENABLED=True
ENV CORE_MODEL_TROCR_ENABLED=false
ENV RUNS_ON_JETSON=True
ENV ENABLE_STREAM_API=True
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
1 change: 1 addition & 0 deletions docker/dockerfiles/Dockerfile.onnx.trt
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,6 @@ ENV API_LOGGING_ENABLED=True
ENV PYTHONPATH=/app/:${PYTHONPATH}
ENV PATH=/opt/miniconda/bin:$PATH
ENV CORE_MODEL_TROCR_ENABLED=false
ENV ENABLE_PROMETHEUS=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
2 changes: 2 additions & 0 deletions inference/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@
# Flag to enable byte track, default is False
ENABLE_BYTE_TRACK = str2bool(os.getenv("ENABLE_BYTE_TRACK", False))

ENABLE_PROMETHEUS = str2bool(os.getenv("ENABLE_PROMETHEUS", False))

# Flag to enforce FPS, default is False
ENFORCE_FPS = str2bool(os.getenv("ENFORCE_FPS", False))
MAX_FPS = os.getenv("MAX_FPS")
Expand Down
5 changes: 5 additions & 0 deletions inference/core/interfaces/http/http_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from fastapi.responses import JSONResponse, RedirectResponse, Response
from fastapi.staticfiles import StaticFiles
from fastapi_cprofile.profiler import CProfileMiddleware
from prometheus_fastapi_instrumentator import Instrumentator
from starlette.convertors import StringConvertor, register_url_convertor
from starlette.middleware.base import BaseHTTPMiddleware

Expand Down Expand Up @@ -110,6 +111,7 @@
CORE_MODELS_ENABLED,
DEDICATED_DEPLOYMENT_WORKSPACE_URL,
DISABLE_WORKFLOW_ENDPOINTS,
ENABLE_PROMETHEUS,
ENABLE_STREAM_API,
LAMBDA,
LEGACY_ROUTE_ENABLED,
Expand Down Expand Up @@ -489,6 +491,9 @@ def __init__(
root_path=root_path,
)

if ENABLE_PROMETHEUS:
Instrumentator().expose(app, endpoint="/metrics")

if METLO_KEY:
app.add_middleware(
ASGIMiddleware, host="https://app.metlo.com", api_key=METLO_KEY
Expand Down

0 comments on commit 9b65794

Please sign in to comment.