Skip to content

Commit 937f686

Browse files
committed
feat: make model metrics endpoints configurable
1 parent 7ca36bf commit 937f686

File tree

4 files changed

+22
-9
lines changed

4 files changed

+22
-9
lines changed

cmd/epp/runner/runner.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ var (
110110
"vllm:lora_requests_info",
111111
"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
112112

113+
modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods")
114+
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
115+
113116
setupLog = ctrl.Log.WithName("setup")
114117

115118
// Environment variables
@@ -183,7 +186,11 @@ func (r *Runner) Run(ctx context.Context) error {
183186
return err
184187
}
185188
verifyMetricMapping(*mapping, setupLog)
186-
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
189+
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
190+
MetricMapping: mapping,
191+
ModelServerMetricsPort: int32(*modelServerMetricsPort),
192+
ModelServerMetricsPath: *modelServerMetricsPath,
193+
}, *refreshMetricsInterval)
187194

188195
datastore := datastore.NewDatastore(ctx, pmf)
189196

pkg/epp/backend/metrics/fake.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ type FakePodMetricsClient struct {
5656
Res map[types.NamespacedName]*MetricsState
5757
}
5858

59-
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
59+
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, _ int32) (*MetricsState, error) {
6060
f.errMu.RLock()
6161
err, ok := f.Err[pod.NamespacedName]
6262
f.errMu.RUnlock()

pkg/epp/backend/metrics/metrics.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,14 @@ const (
3737
)
3838

3939
type PodMetricsClientImpl struct {
40-
MetricMapping *MetricMapping
40+
MetricMapping *MetricMapping
41+
ModelServerMetricsPort int32
42+
ModelServerMetricsPath string
4143
}
4244

4345
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
4446
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
45-
// Currently the metrics endpoint is hard-coded, which works with vLLM.
46-
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
47-
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"
48-
47+
url := p.getMetricEndpoint(pod, port)
4948
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
5049
if err != nil {
5150
return nil, fmt.Errorf("failed to create request: %v", err)
@@ -70,6 +69,13 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
7069
return p.promToPodMetrics(metricFamilies, existing)
7170
}
7271

72+
func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod, targetPortNumber int32) string {
73+
if p.ModelServerMetricsPort == 0 {
74+
p.ModelServerMetricsPort = targetPortNumber
75+
}
76+
return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
77+
}
78+
7379
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
7480
func (p *PodMetricsClientImpl) promToPodMetrics(
7581
metricFamilies map[string]*dto.MetricFamily,

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,9 +495,9 @@ func TestFetchMetrics(t *testing.T) {
495495
},
496496
}
497497
existing := &MetricsState{}
498-
p := &PodMetricsClientImpl{} // No MetricMapping needed for this basic test
498+
p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
499499

500-
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use.
500+
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use
501501
if err == nil {
502502
t.Errorf("FetchMetrics() expected error, got nil")
503503
}

0 commit comments

Comments
 (0)