Skip to content

Commit 7466a28

Browse files
committed
feat: make model metrics endpoints configurable
1 parent 7ca36bf commit 7466a28

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

cmd/epp/runner/runner.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ var (
110110
"vllm:lora_requests_info",
111111
"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
112112

113+
modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods")
114+
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
115+
113116
setupLog = ctrl.Log.WithName("setup")
114117

115118
// Environment variables
@@ -183,7 +186,11 @@ func (r *Runner) Run(ctx context.Context) error {
183186
return err
184187
}
185188
verifyMetricMapping(*mapping, setupLog)
186-
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
189+
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
190+
MetricMapping: mapping,
191+
ModelServerMetricsPort: int32(*modelServerMetricsPort),
192+
ModelServerMetricsPath: *modelServerMetricsPath,
193+
}, *refreshMetricsInterval)
187194

188195
datastore := datastore.NewDatastore(ctx, pmf)
189196

pkg/epp/backend/metrics/fake.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ type FakePodMetricsClient struct {
5656
Res map[types.NamespacedName]*MetricsState
5757
}
5858

59-
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
59+
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, url string) (*MetricsState, error) {
6060
f.errMu.RLock()
6161
err, ok := f.Err[pod.NamespacedName]
6262
f.errMu.RUnlock()
@@ -73,6 +73,10 @@ func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Po
7373
return res.Clone(), nil
7474
}
7575

76+
func (f *FakePodMetricsClient) GetMetricEndpoint(_ *backend.Pod, _ int32) string {
77+
return "fake-metric-endpoint"
78+
}
79+
7680
func (f *FakePodMetricsClient) SetRes(new map[types.NamespacedName]*MetricsState) {
7781
f.resMu.Lock()
7882
defer f.resMu.Unlock()

pkg/epp/backend/metrics/metrics.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,13 @@ const (
3737
)
3838

3939
type PodMetricsClientImpl struct {
40-
MetricMapping *MetricMapping
40+
MetricMapping *MetricMapping
41+
ModelServerMetricsPort int32
42+
ModelServerMetricsPath string
4143
}
4244

4345
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
44-
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
45-
// Currently the metrics endpoint is hard-coded, which works with vLLM.
46-
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
47-
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"
48-
46+
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, url string) (*MetricsState, error) {
4947
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
5048
if err != nil {
5149
return nil, fmt.Errorf("failed to create request: %v", err)
@@ -70,6 +68,13 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
7068
return p.promToPodMetrics(metricFamilies, existing)
7169
}
7270

71+
func (p *PodMetricsClientImpl) GetMetricEndpoint(pod *backend.Pod, targetPortNumber int32) string {
72+
if p.ModelServerMetricsPort == 0 {
73+
p.ModelServerMetricsPort = targetPortNumber
74+
}
75+
return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
76+
}
77+
7378
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
7479
func (p *PodMetricsClientImpl) promToPodMetrics(
7580
metricFamilies map[string]*dto.MetricFamily,

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,9 +495,10 @@ func TestFetchMetrics(t *testing.T) {
495495
},
496496
}
497497
existing := &MetricsState{}
498-
p := &PodMetricsClientImpl{} // No MetricMapping needed for this basic test
498+
p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
499499

500-
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use.
500+
url := p.GetMetricEndpoint(pod, 9999) // Use a port that's unlikely to be in use
501+
_, err := p.FetchMetrics(ctx, pod, existing, url)
501502
if err == nil {
502503
t.Errorf("FetchMetrics() expected error, got nil")
503504
}

pkg/epp/backend/metrics/pod_metrics.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ type podMetrics struct {
5050
}
5151

5252
type PodMetricsClient interface {
53-
FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error)
53+
FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, url string) (*MetricsState, error)
54+
GetMetricEndpoint(pod *backend.Pod, poolTargetPortNumber int32) string
5455
}
5556

5657
func (pm *podMetrics) String() string {
@@ -116,9 +117,11 @@ func (pm *podMetrics) refreshMetrics() error {
116117
}
117118
ctx, cancel := context.WithTimeout(context.Background(), fetchMetricsTimeout)
118119
defer cancel()
119-
updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber)
120+
url := pm.pmc.GetMetricEndpoint(pm.GetPod(), pool.Spec.TargetPortNumber)
121+
updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), url)
120122
if err != nil {
121123
pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err)
124+
return err
122125
}
123126
// Optimistically update metrics even if there was an error.
124127
// The FetchMetrics can return an error for the following reasons:

0 commit comments

Comments
 (0)