Skip to content

Commit

Permalink
Merge pull request #721 from roboflow/revert-717-additional-prometheu…
Browse files Browse the repository at this point in the history
…s-stats

Revert "Add Prometheus CPU/GPU custom metrics and latency"
  • Loading branch information
PawelPeczek-Roboflow authored Oct 4, 2024
2 parents dbc5c2a + 30afc56 commit 5629ed6
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 69 deletions.
15 changes: 1 addition & 14 deletions inference/core/interfaces/http/http_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from fastapi.staticfiles import StaticFiles
from fastapi_cprofile.profiler import CProfileMiddleware
from prometheus_fastapi_instrumentator import Instrumentator
from prometheus_fastapi_instrumentator import metrics as prom_metrics
from starlette.convertors import StringConvertor, register_url_convertor
from starlette.middleware.base import BaseHTTPMiddleware

Expand Down Expand Up @@ -184,10 +183,6 @@
MessageToBigError,
)
from inference.core.managers.base import ModelManager
from inference.core.managers.metrics import (
prom_cpu_utilization_total,
prom_gpu_utilization_total,
)
from inference.core.roboflow_api import (
get_roboflow_dataset_type,
get_roboflow_workspace,
Expand Down Expand Up @@ -497,15 +492,7 @@ def __init__(
)

if ENABLE_PROMETHEUS:
instrumentator = Instrumentator().instrument(app)
instrumentator.add(prom_cpu_utilization_total())
instrumentator.add(prom_gpu_utilization_total())
instrumentator.add(
prom_metrics.latency(
buckets=(1,),
)
)
instrumentator.expose(app, endpoint="/metrics")
Instrumentator().expose(app, endpoint="/metrics")

if METLO_KEY:
app.add_middleware(
Expand Down
55 changes: 0 additions & 55 deletions inference/core/managers/metrics.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,13 @@
import os
import platform
import re
import socket
import time
import uuid
from typing import Callable

import GPUtil
from prometheus_client import REGISTRY, Gauge
from prometheus_fastapi_instrumentator.metrics import Info

from inference.core.cache import cache
from inference.core.logger import logger
from inference.core.version import __version__

previous_cpu_total = None
previous_time = None
NUM_CPU_CORES = os.cpu_count()


def get_model_metrics(
inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
Expand Down Expand Up @@ -109,48 +99,3 @@ def get_inference_results_for_model(
inference_results.append({"request_time": score, "inference": result})

return inference_results


def prom_cpu_utilization_total() -> Callable[[Info], None]:
cpu_utilization_gauge = Gauge(
"process_cpu_utilization_total", "Total CPU utilization"
)

def instrumentation(info: Info) -> None:
global previous_cpu_total, previous_time
cpu_metric = REGISTRY.get_sample_value("process_cpu_seconds_total")
if cpu_metric is None:
return
current_time = time.time()
if previous_cpu_total is None:
previous_time = current_time
previous_cpu_total = cpu_metric
else:
cpu_delta = cpu_metric - previous_cpu_total
time_delta = current_time - previous_time
if time_delta > 0:
cpu_utilization_percent = 100 * (cpu_delta / time_delta) / NUM_CPU_CORES
cpu_utilization_gauge.set(cpu_utilization_percent)
previous_cpu_total = cpu_metric
previous_time = current_time

return instrumentation


def prom_gpu_utilization_total() -> Callable[[Info], None]:
gpu_load_gauge = Gauge("gpu_load_percentage", "GPU Load", ["gpu_id"])
gpu_memory_gauge = Gauge(
"gpu_memory_utilization", "GPU Memory Utilization", ["gpu_id"]
)
gpu_temp_gauge = Gauge("gpu_temperature_celsius", "GPU Temperature", ["gpu_id"])

def instrumentation(info: Info) -> None:
gpus = GPUtil.getGPUs()
if not gpus:
return
for gpu in gpus:
gpu_load_gauge.labels(gpu_id=gpu.id).set(gpu.load * 100)
gpu_memory_gauge.labels(gpu_id=gpu.id).set(gpu.memoryUtil * 100)
gpu_temp_gauge.labels(gpu_id=gpu.id).set(gpu.temperature)

return instrumentation

0 comments on commit 5629ed6

Please sign in to comment.