forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathdeployments.yaml
58 lines (58 loc) · 1.38 KB
/
deployments.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
apiVersion: apps/v1
kind: Deployment
metadata:
name: endpoint-picker
labels:
app: endpoint-picker
spec:
replicas: 1
selector:
matchLabels:
app: endpoint-picker
template:
metadata:
labels:
app: endpoint-picker
spec:
serviceAccountName: endpoint-picker
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: quay.io/vllm-d/gateway-api-inference-extension/epp:latest
imagePullPolicy: IfNotPresent
args:
- -poolName
- "${POOL_NAME}"
- -v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
ports:
- containerPort: 9002
- containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
env:
- name: KVCACHE_INDEXER_REDIS_ADDR
value: ${REDIS_HOST}:${REDIS_PORT}
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: ${HF_SECRET_NAME}
key: ${HF_SECRET_KEY}