| 
 | 1 | +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.  | 
 | 2 | +# SPDX-License-Identifier: Apache-2.0  | 
 | 3 | + | 
 | 4 | +"""  | 
 | 5 | +Prometheus metrics utilities for Dynamo components.  | 
 | 6 | +
  | 
 | 7 | +This module provides shared functionality for collecting and exposing Prometheus metrics  | 
 | 8 | +from backend engines (SGLang, vLLM, etc.) via Dynamo's metrics endpoint.  | 
 | 9 | +
  | 
 | 10 | +Note: Engine metrics take time to appear after engine initialization,  | 
 | 11 | +while Dynamo runtime metrics are available immediately after component creation.  | 
 | 12 | +"""  | 
 | 13 | + | 
 | 14 | +import logging  | 
 | 15 | +import re  | 
 | 16 | +from typing import TYPE_CHECKING, Optional  | 
 | 17 | + | 
 | 18 | +from prometheus_client import generate_latest  | 
 | 19 | + | 
 | 20 | +from dynamo._core import Endpoint  | 
 | 21 | + | 
 | 22 | +# Import CollectorRegistry only for type hints to avoid importing prometheus_client at module load time.  | 
 | 23 | +# prometheus_client must be imported AFTER set_prometheus_multiproc_dir() is called.  | 
 | 24 | +# See main.py worker() function for detailed explanation.  | 
 | 25 | +if TYPE_CHECKING:  | 
 | 26 | +    from prometheus_client import CollectorRegistry  | 
 | 27 | + | 
 | 28 | + | 
 | 29 | +def register_engine_metrics_callback(  | 
 | 30 | +    endpoint: Endpoint,  | 
 | 31 | +    registry: "CollectorRegistry",  | 
 | 32 | +    metric_prefix: str,  | 
 | 33 | +    engine_name: str,  | 
 | 34 | +) -> None:  | 
 | 35 | +    """  | 
 | 36 | +    Register a callback to expose engine Prometheus metrics via Dynamo's metrics endpoint.  | 
 | 37 | +
  | 
 | 38 | +    This registers a callback that is invoked when /metrics is scraped, passing through  | 
 | 39 | +    engine-specific metrics alongside Dynamo runtime metrics.  | 
 | 40 | +
  | 
 | 41 | +    Args:  | 
 | 42 | +        endpoint: Dynamo endpoint object with metrics.register_prometheus_expfmt_callback()  | 
 | 43 | +        registry: Prometheus registry to collect from (e.g., REGISTRY or CollectorRegistry)  | 
 | 44 | +        metric_prefix: Prefix to filter metrics (e.g., "vllm:" or "sglang:")  | 
 | 45 | +        engine_name: Name of the engine for logging (e.g., "vLLM" or "SGLang")  | 
 | 46 | +
  | 
 | 47 | +    Example:  | 
 | 48 | +        from prometheus_client import REGISTRY  | 
 | 49 | +        register_engine_metrics_callback(  | 
 | 50 | +            generate_endpoint, REGISTRY, "vllm:", "vLLM"  | 
 | 51 | +        )  | 
 | 52 | +    """  | 
 | 53 | + | 
 | 54 | +    def get_expfmt() -> str:  | 
 | 55 | +        """Callback to return engine Prometheus metrics in exposition format"""  | 
 | 56 | +        return get_prometheus_expfmt(registry, metric_prefix_filter=metric_prefix)  | 
 | 57 | + | 
 | 58 | +    endpoint.metrics.register_prometheus_expfmt_callback(get_expfmt)  | 
 | 59 | + | 
 | 60 | + | 
 | 61 | +def get_prometheus_expfmt(  | 
 | 62 | +    registry,  | 
 | 63 | +    metric_prefix_filter: Optional[str] = None,  | 
 | 64 | +) -> str:  | 
 | 65 | +    """  | 
 | 66 | +    Get Prometheus metrics from a registry formatted as text using the standard text encoder.  | 
 | 67 | +
  | 
 | 68 | +    Collects all metrics from the registry and returns them in Prometheus text exposition format.  | 
 | 69 | +    Optionally filters metrics by prefix.  | 
 | 70 | +
  | 
 | 71 | +    Prometheus exposition format consists of:  | 
 | 72 | +    - Comment lines starting with # (HELP and TYPE declarations)  | 
 | 73 | +    - Metric lines with format: metric_name{label="value"} metric_value timestamp  | 
 | 74 | +
  | 
 | 75 | +    Example output format:  | 
 | 76 | +        # HELP vllm:request_success_total Number of successful requests  | 
 | 77 | +        # TYPE vllm:request_success_total counter  | 
 | 78 | +        vllm:request_success_total{model="llama2",endpoint="generate"} 150.0  | 
 | 79 | +        # HELP vllm:time_to_first_token_seconds Time to first token  | 
 | 80 | +        # TYPE vllm:time_to_first_token_seconds histogram  | 
 | 81 | +        vllm:time_to_first_token_seconds_bucket{model="llama2",le="0.01"} 10.0  | 
 | 82 | +        vllm:time_to_first_token_seconds_bucket{model="llama2",le="0.1"} 45.0  | 
 | 83 | +        vllm:time_to_first_token_seconds_count{model="llama2"} 50.0  | 
 | 84 | +        vllm:time_to_first_token_seconds_sum{model="llama2"} 2.5  | 
 | 85 | +
  | 
 | 86 | +    Args:  | 
 | 87 | +        registry: Prometheus registry to collect from.  | 
 | 88 | +                 Pass CollectorRegistry with MultiProcessCollector for SGLang.  | 
 | 89 | +                 Pass REGISTRY for vLLM single-process mode.  | 
 | 90 | +        metric_prefix_filter: Optional prefix to filter displayed metrics (e.g., "vllm:").  | 
 | 91 | +                             If None, returns all metrics. (default: None)  | 
 | 92 | +
  | 
 | 93 | +    Returns:  | 
 | 94 | +        Formatted metrics text in Prometheus exposition format. Returns empty string on error.  | 
 | 95 | +
  | 
 | 96 | +    Example:  | 
 | 97 | +        from prometheus_client import REGISTRY  | 
 | 98 | +        metrics_text = get_prometheus_expfmt(REGISTRY)  | 
 | 99 | +        print(metrics_text)  | 
 | 100 | +
  | 
 | 101 | +        # With filter  | 
 | 102 | +        vllm_metrics = get_prometheus_expfmt(REGISTRY, metric_prefix_filter="vllm:")  | 
 | 103 | +    """  | 
 | 104 | +    try:  | 
 | 105 | +        # Generate metrics in Prometheus text format  | 
 | 106 | +        metrics_text = generate_latest(registry).decode("utf-8")  | 
 | 107 | + | 
 | 108 | +        if metric_prefix_filter:  | 
 | 109 | +            # Filter lines: keep metric lines starting with prefix and their HELP/TYPE comments  | 
 | 110 | +            escaped_prefix = re.escape(metric_prefix_filter)  | 
 | 111 | +            pattern = rf"^(?:{escaped_prefix}|# (?:HELP|TYPE) {escaped_prefix})"  | 
 | 112 | +            filtered_lines = [  | 
 | 113 | +                line for line in metrics_text.split("\n") if re.match(pattern, line)  | 
 | 114 | +            ]  | 
 | 115 | +            result = "\n".join(filtered_lines)  | 
 | 116 | +            if result:  | 
 | 117 | +                # Ensure result ends with newline  | 
 | 118 | +                if result and not result.endswith("\n"):  | 
 | 119 | +                    result += "\n"  | 
 | 120 | +            return result  | 
 | 121 | +        else:  | 
 | 122 | +            # Ensure metrics_text ends with newline  | 
 | 123 | +            if metrics_text and not metrics_text.endswith("\n"):  | 
 | 124 | +                metrics_text += "\n"  | 
 | 125 | +            return metrics_text  | 
 | 126 | + | 
 | 127 | +    except Exception as e:  | 
 | 128 | +        logging.error(f"Error getting metrics: {e}")  | 
 | 129 | +        return ""  | 
0 commit comments