Skip to content

Commit 7a013f5

Browse files
committed
- implemented kvcache-aware-scorer
- modified flow to support feeding prompt - implemented max-score-picker - minor refactoring - modified makefile and fixed rebase Signed-off-by: Maroon Ayoub <[email protected]>
1 parent a80bcfc commit 7a013f5

24 files changed

+453
-225
lines changed

Dockerfile

+21-2
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,47 @@ FROM quay.io/projectquay/golang:1.24 AS builder
33
ARG TARGETOS
44
ARG TARGETARCH
55

6-
# ENV GOPROXY=https://goproxy.io,direct
6+
# Install build tools
7+
RUN dnf install -y gcc-c++ libstdc++ libstdc++-devel && dnf clean all
78

89
WORKDIR /workspace
10+
11+
## NeuralMagic internal repos pull config
12+
ARG GIT_NM_USER
13+
ARG NM_TOKEN
14+
### use git token
15+
RUN echo -e "machine github.com\n\tlogin ${GIT_NM_USER}\n\tpassword ${NM_TOKEN}" >> ~/.netrc
16+
ENV GOPRIVATE=github.com/neuralmagic
17+
ENV GIT_TERMINAL_PROMPT=1
18+
919
# Copy the Go Modules manifests
1020
COPY go.mod go.mod
1121
COPY go.sum go.sum
1222
# cache deps before building and copying source so that we don't need to re-download as much
1323
# and so that source changes don't invalidate our downloaded layer
1424
RUN go mod download
25+
RUN rm -rf ~/.netrc # remove git token
1526

1627
# Copy the go source
1728
COPY cmd ./cmd
1829
COPY pkg ./pkg
1930
COPY internal ./internal
2031
COPY api ./api
2132

33+
# HuggingFace tokenizer bindings
34+
RUN mkdir -p lib
35+
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
36+
RUN ranlib lib/*.a
37+
2238
# Build
2339
# the GOARCH has not a default value to allow the binary be built according to the host where the command
2440
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
2541
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2642
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
27-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o bin/epp cmd/epp/main.go cmd/epp/health.go
43+
ENV CGO_ENABLED=1
44+
ENV GOOS=${TARGETOS:-linux}
45+
ENV GOARCH=${TARGETARCH}
46+
RUN go build -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib'" cmd/epp/main.go cmd/epp/health.go
2847

2948
# Use distroless as minimal base image to package the manager binary
3049
# Refer to https://github.com/GoogleContainerTools/distroless for more details

Makefile

+145-165
Large diffs are not rendered by default.

cmd/epp/main.go

-1
Original file line numberDiff line numberDiff line change
@@ -314,5 +314,4 @@ func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logge
314314
if mapping.LoraRequestInfo == nil {
315315
logger.Info("Not scraping metric: LoraRequestInfo")
316316
}
317-
318317
}

go.mod

+14-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
module sigs.k8s.io/gateway-api-inference-extension
22

3-
go 1.24.0
3+
go 1.24.1
4+
5+
toolchain go1.24.2
46

57
require (
68
github.com/elastic/crd-ref-docs v0.1.0
79
github.com/envoyproxy/go-control-plane/envoy v1.32.4
810
github.com/go-logr/logr v1.4.2
911
github.com/google/go-cmp v0.7.0
12+
github.com/google/uuid v1.6.0
13+
github.com/neuralmagic/kvcache-manager v0.0.0-20250422070607-db465f8aaa71
1014
github.com/onsi/ginkgo/v2 v2.23.4
1115
github.com/onsi/gomega v1.37.0
1216
github.com/prometheus/client_golang v1.22.0
@@ -41,7 +45,9 @@ require (
4145
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4246
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4347
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
48+
github.com/daulet/tokenizers v1.20.2 // indirect
4449
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
50+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
4551
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
4652
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
4753
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
@@ -66,9 +72,9 @@ require (
6672
github.com/google/gnostic-models v0.6.8 // indirect
6773
github.com/google/gofuzz v1.2.0 // indirect
6874
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
69-
github.com/google/uuid v1.6.0 // indirect
7075
github.com/gorilla/websocket v1.5.0 // indirect
7176
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
77+
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
7278
github.com/huandu/xstrings v1.3.3 // indirect
7379
github.com/imdario/mergo v0.3.11 // indirect
7480
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@@ -90,6 +96,7 @@ require (
9096
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
9197
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
9298
github.com/prometheus/procfs v0.15.1 // indirect
99+
github.com/redis/go-redis/v9 v9.7.3 // indirect
93100
github.com/spf13/cobra v1.8.1 // indirect
94101
github.com/spf13/pflag v1.0.5 // indirect
95102
github.com/stoewer/go-strcase v1.3.0 // indirect
@@ -104,15 +111,15 @@ require (
104111
go.opentelemetry.io/otel/trace v1.34.0 // indirect
105112
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
106113
go.uber.org/automaxprocs v1.6.0 // indirect
107-
golang.org/x/crypto v0.36.0 // indirect
114+
golang.org/x/crypto v0.37.0 // indirect
108115
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
109116
golang.org/x/mod v0.24.0 // indirect
110-
golang.org/x/net v0.38.0 // indirect
117+
golang.org/x/net v0.39.0 // indirect
111118
golang.org/x/oauth2 v0.27.0 // indirect
112-
golang.org/x/sync v0.12.0 // indirect
119+
golang.org/x/sync v0.13.0 // indirect
113120
golang.org/x/sys v0.32.0 // indirect
114-
golang.org/x/term v0.30.0 // indirect
115-
golang.org/x/text v0.23.0 // indirect
121+
golang.org/x/term v0.31.0 // indirect
122+
golang.org/x/text v0.24.0 // indirect
116123
golang.org/x/time v0.7.0 // indirect
117124
golang.org/x/tools v0.31.0 // indirect
118125
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect

go.sum

+24-12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
1616
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
1717
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
1818
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
19+
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
20+
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
21+
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
22+
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
1923
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
2024
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
2125
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
@@ -24,10 +28,14 @@ github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 h1:boJj011Hh+874zpIySe
2428
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
2529
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
2630
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
31+
github.com/daulet/tokenizers v1.20.2 h1:tlq/vIOiBTKDPets3596aFvmJYLn3XI6LFKq4q9LKhQ=
32+
github.com/daulet/tokenizers v1.20.2/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
2733
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2834
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2935
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
3036
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
37+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
38+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
3139
github.com/elastic/crd-ref-docs v0.1.0 h1:Cr5kz89QB3Iuuj7dhAfLMApCrChEGAaIBTxGk/xuRKw=
3240
github.com/elastic/crd-ref-docs v0.1.0/go.mod h1:X83mMBdJt05heJUYiS3T0yJ/JkCuliuhSUNav5Gjo/U=
3341
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
@@ -100,6 +108,8 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm
100108
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
101109
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
102110
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
111+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
112+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
103113
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
104114
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
105115
github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
@@ -147,6 +157,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
147157
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
148158
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
149159
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
160+
github.com/neuralmagic/kvcache-manager v0.0.0-20250422070607-db465f8aaa71 h1:GGHC75iOo4KpJECtCz7iu5K6Q5fkXs93QpiMgJz/cIY=
161+
github.com/neuralmagic/kvcache-manager v0.0.0-20250422070607-db465f8aaa71/go.mod h1:jzOr2R8AIGVbI/nVE7dLPr6f6kPI93ftELtJbwsaH3A=
150162
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
151163
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
152164
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -172,6 +184,8 @@ github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA
172184
github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18=
173185
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
174186
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
187+
github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
188+
github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
175189
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
176190
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
177191
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -226,8 +240,8 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
226240
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
227241
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
228242
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
229-
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
230-
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
243+
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
244+
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
231245
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
232246
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
233247
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
@@ -238,31 +252,29 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
238252
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
239253
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
240254
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
241-
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
242-
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
243-
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
244-
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
255+
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
256+
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
245257
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
246258
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
247259
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
248260
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
249261
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
250-
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
251-
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
262+
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
263+
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
252264
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
253265
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
254266
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
255267
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
256268
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
257269
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
258270
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
259-
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
260-
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
271+
golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o=
272+
golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw=
261273
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
262274
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
263275
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
264-
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
265-
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
276+
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
277+
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
266278
golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
267279
golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
268280
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

pkg/epp/backend/metrics/fake.go

+2
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ func (fpm *FakePodMetrics) String() string {
4040
func (fpm *FakePodMetrics) GetPod() *Pod {
4141
return fpm.Pod
4242
}
43+
4344
func (fpm *FakePodMetrics) GetMetrics() *Metrics {
4445
return fpm.Metrics
4546
}
47+
4648
func (fpm *FakePodMetrics) UpdatePod(pod *corev1.Pod) {
4749
fpm.Pod = toInternalPod(pod)
4850
}

pkg/epp/backend/metrics/metrics.go

-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ func (p *PodMetricsClientImpl) FetchMetrics(
4747
existing *Metrics,
4848
port int32,
4949
) (*Metrics, error) {
50-
5150
// Currently the metrics endpoint is hard-coded, which works with vLLM.
5251
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
5352
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"

pkg/epp/backend/metrics/metrics_test.go

-3
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ func makeMetricFamily(name string, metrics ...*dto.Metric) *dto.MetricFamily {
5858
// --- Tests ---
5959

6060
func TestGetMetric(t *testing.T) {
61-
6261
metricFamilies := map[string]*dto.MetricFamily{
6362
"metric1": makeMetricFamily("metric1",
6463
makeMetric(map[string]string{"label1": "value1"}, 1.0, 1000),
@@ -166,7 +165,6 @@ func TestGetMetric(t *testing.T) {
166165

167166
for _, tt := range tests {
168167
t.Run(tt.name, func(t *testing.T) {
169-
170168
gotMetric, err := p.getMetric(metricFamilies, tt.spec)
171169

172170
if tt.wantError {
@@ -240,7 +238,6 @@ func TestLabelsMatch(t *testing.T) {
240238
}
241239

242240
func TestGetLatestLoraMetric(t *testing.T) {
243-
244241
testCases := []struct {
245242
name string
246243
metricFamilies map[string]*dto.MetricFamily

pkg/epp/backend/metrics/pod_metrics_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,12 @@ type fakeDataStore struct{}
8888
func (f *fakeDataStore) PoolGet() (*v1alpha2.InferencePool, error) {
8989
return &v1alpha2.InferencePool{Spec: v1alpha2.InferencePoolSpec{TargetPortNumber: 8000}}, nil
9090
}
91+
9192
func (f *fakeDataStore) PodGetAll() []PodMetrics {
9293
// Not implemented.
9394
return nil
9495
}
96+
9597
func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics {
9698
// Not implemented.
9799
return nil

pkg/epp/controller/inferencemodel_reconciler_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ func TestInferenceModelReconciler(t *testing.T) {
227227
if diff := diffStore(ds, diffStoreParams{wantPool: pool, wantModels: test.wantModels}); diff != "" {
228228
t.Errorf("Unexpected diff (+got/-want): %s", diff)
229229
}
230-
231230
})
232231
}
233232
}

pkg/epp/datastore/datastore.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ const (
4141
sessionKeepAliveCheckFrequency = 15 * time.Minute // How often to check for overly idle sessions
4242
)
4343

44-
var (
45-
errPoolNotSynced = errors.New("InferencePool is not initialized in data store")
46-
)
44+
var errPoolNotSynced = errors.New("InferencePool is not initialized in data store")
4745

4846
// The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api)
4947
type Datastore interface {

pkg/epp/datastore/datastore_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ func TestModel(t *testing.T) {
204204
existing := ds.ModelDelete(types.NamespacedName{Name: model1ts.Name, Namespace: model1ts.Namespace})
205205
got := ds.ModelGet(tsModel)
206206
return existing != nil && got == nil
207-
208207
},
209208
wantOpResult: true,
210209
wantModels: []*v1alpha2.InferenceModel{model2chat},
@@ -226,7 +225,6 @@ func TestModel(t *testing.T) {
226225
if diff := testutil.DiffModelLists(test.wantModels, ds.ModelGetAll()); diff != "" {
227226
t.Errorf("Unexpected models diff: %s", diff)
228227
}
229-
230228
})
231229
}
232230
}

pkg/epp/handlers/request.go

+7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import (
3333
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
3434
)
3535

36+
const emptyPrompt = ""
37+
3638
// HandleRequestBody always returns the requestContext even in the error case, as the request context is used in error handling.
3739
func (s *StreamingServer) HandleRequestBody(
3840
ctx context.Context,
@@ -70,6 +72,7 @@ func (s *StreamingServer) HandleRequestBody(
7072
ResolvedTargetModel: modelName,
7173
Critical: modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical,
7274
SessionID: reqCtx.SessionID,
75+
Prompt: emptyPrompt,
7376
}
7477
logger.V(logutil.DEBUG).Info("LLM request assembled", "request", llmReq, "session id", reqCtx.SessionID)
7578

@@ -78,6 +81,10 @@ func (s *StreamingServer) HandleRequestBody(
7881
if llmReq.Model != llmReq.ResolvedTargetModel {
7982
requestBodyMap["model"] = llmReq.ResolvedTargetModel
8083
}
84+
// Extract prompt from the request body.
85+
if prompt, ok := requestBodyMap["prompt"].(string); ok {
86+
llmReq.Prompt = prompt
87+
}
8188

8289
requestBodyBytes, err = json.Marshal(requestBodyMap)
8390
if err != nil {

pkg/epp/scheduling/plugins/filter.go

-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ var LoRAAffinityFilter = &Filter{
217217
// - Filtered slice of pod metrics based on affinity and availability
218218
// - Error if any issues occur during filtering
219219
func loRASoftAffinityFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
220-
221220
// Pre-allocate slices with estimated capacity
222221
filtered_affinity := make([]types.Pod, 0, len(pods))
223222
filtered_available := make([]types.Pod, 0, len(pods))

0 commit comments

Comments
 (0)