[fix] Fix the kind environemnt and set gateway service to be NodePort

kfirtoledo · kfirtoledo · commit 17a23e5a553c · 2025-04-29T16:50:32.000+03:00
Signed-off-by: Kfir Toledo &lt;kfir.toledo@ibm.com&gt;
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -191,7 +191,7 @@ export REGISTRY_SECRET=anna-pull-secret
 
 Set the `VLLM_MODE` environment variable based on which version of vLLM you want to deploy:
 
-* `vllm-sim`: Lightweight simulator for simple environments (defult).
+* `vllm-sim`: Lightweight simulator for simple environments (default).
 * `vllm`: Full vLLM model server, using GPU/CPU for inferencing
 * `vllm-p2p`: Full vLLM with LMCache P2P support for enable KV-Cache aware routing
 
@@ -224,19 +224,19 @@ kubectl port-forward service/inference-gateway 8080:80
 
 And making requests with `curl`:
 
-- vllm-sim
+**vllm-sim:**
 
-    ```bash
-    curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: application/json' \
-      -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq
-    ```
+```bash
+curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: application/json' \
+  -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq
+```
 
-- vllm or vllm-p2p
+**vllm or vllm-p2p:**
 
-  ```bash
-  curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: application/json' \
-    -d '{"model":"meta-llama/Llama-3.1-8B-Instruct","prompt":"hi","max_tokens":10,"temperature":0}' | jq
-  ```
+```bash
+curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: application/json' \
+  -d '{"model":"meta-llama/Llama-3.1-8B-Instruct","prompt":"hi","max_tokens":10,"temperature":0}' | jq
+```
 
 #### Environment Configurateion
 
diff --git a/deploy/components/inference-gateway/deployments.yaml b/deploy/components/inference-gateway/deployments.yaml
@@ -48,11 +48,3 @@ spec:
             service: inference-extension
           initialDelaySeconds: 5
           periodSeconds: 10
-        env:
-          - name: KVCACHE_INDEXER_REDIS_ADDR
-            value: ${REDIS_HOST}:${REDIS_PORT}
-          - name: HF_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: ${HF_SECRET_NAME}
-                key: ${HF_SECRET_KEY}
diff --git a/deploy/components/inference-gateway/kustomization.yaml b/deploy/components/inference-gateway/kustomization.yaml
@@ -26,8 +26,6 @@ resources:
 - deployments.yaml
 - gateways.yaml
 - httproutes.yaml
-- secret.yaml
-
 
 images:
 - name: quay.io/vllm-d/gateway-api-inference-extension/epp
diff --git a/deploy/components/vllm/kustomization.yaml b/deploy/components/vllm/kustomization.yaml
@@ -33,4 +33,4 @@ images:
 configMapGenerator:
 - name: vllm-model-config
   literals:
-    - MODEL_NAME=${MODEL_NAME}
+    - MODEL_NAME=${MODEL_NAME}
diff --git a/deploy/environments/dev/kubernetes-kgateway/gateway-parameters.yaml b/deploy/environments/dev/kubernetes-kgateway/gateway-parameters.yaml
@@ -11,7 +11,7 @@ spec:
         runAsNonRoot: true
         runAsUser: "${PROXY_UID}"
     service:
-      type: LoadBalancer
+      type: ${GATEWAY_SERVICE_TYPE}
       extraLabels:
         gateway: custom
     podTemplate:
diff --git a/deploy/environments/dev/kubernetes-kgateway/kustomization.yaml b/deploy/environments/dev/kubernetes-kgateway/kustomization.yaml
@@ -4,6 +4,7 @@ kind: Kustomization
 namespace: ${NAMESPACE}
 
 resources:
+- secret.yaml
 - ../../../components/inference-gateway/
 - gateway-parameters.yaml
 
@@ -14,4 +15,4 @@ images:
 
 patches:
 - path: patch-deployments.yaml
-- path: patch-gateways.yaml
+- path: patch-gateways.yaml
diff --git a/deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml b/deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml
@@ -22,3 +22,11 @@ spec:
         - "9002"
         - -grpcHealthPort
         - "9003"
+        env:
+          - name: KVCACHE_INDEXER_REDIS_ADDR
+            value: ${REDIS_HOST}:${REDIS_PORT}
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: hf-token
+                key: ${HF_SECRET_KEY}
diff --git a/deploy/environments/dev/kubernetes-kgateway/secret.yaml b/deploy/environments/dev/kubernetes-kgateway/secret.yaml
diff --git a/deploy/environments/dev/kubernetes-vllm/vllm/kustomization.yaml b/deploy/environments/dev/kubernetes-vllm/vllm/kustomization.yaml
@@ -5,7 +5,7 @@ resources:
 - ../../../../components/vllm/
 
 images:
-- name: quay.io/vllm-d/vllm-d-dev:0.0.2
+- name: quay.io/vllm-d/vllm-d-dev
   newName: ${VLLM_IMAGE}
   newTag: ${VLLM_TAG}
 
diff --git a/scripts/kind-dev-env.sh b/scripts/kind-dev-env.sh
@@ -25,6 +25,11 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Set the host port to map to the Gateway's inbound port (30080)
 : "${GATEWAY_HOST_PORT:=30080}"
 
+# Set the inference pool name for the deployment
+export POOL_NAME="${POOL_NAME:-vllm-llama3-8b-instruct}"
+
+# Set the model name to deploy
+export MODEL_NAME="${MODEL_NAME:-meta-llama/Llama-3.1-8B-Instruct}"
 # ------------------------------------------------------------------------------
 # Setup & Requirement Checks
 # ------------------------------------------------------------------------------
@@ -113,7 +118,7 @@ kustomize build --enable-helm deploy/components/crds-kgateway |
 
 # Deploy the environment to the "default" namespace
 kustomize build --enable-helm deploy/environments/dev/kind-kgateway \
-	| sed "s/REPLACE_NAMESPACE/${PROJECT_NAMESPACE}/gI" \
+	| envsubst | sed "s/REPLACE_NAMESPACE/${PROJECT_NAMESPACE}/gI" \
 	| kubectl --context ${KUBE_CONTEXT} apply -f -
 
 # Wait for all control-plane pods to be ready
diff --git a/scripts/kubernetes-dev-env.sh b/scripts/kubernetes-dev-env.sh
@@ -24,6 +24,7 @@ fi
 # GIE Configuration
 export POOL_NAME="${POOL_NAME:-vllm-llama3-8b-instruct}"
 export MODEL_NAME="${MODEL_NAME:-meta-llama/Llama-3.1-8B-Instruct}"
+export GATEWAY_SERVICE_TYPE="${GATEWAY_SERVICE_TYPE:-NodePort}"
 
 ## EPP ENV VARs — currently added to all EPPs, regardless of the VLLM mode or whether they are actually needed
 export REDIS_DEPLOYMENT_NAME="${REDIS_DEPLOYMENT_NAME:-lookup-server-service}"
@@ -104,7 +105,7 @@ if [[ "$CLEAN" == "true" ]]; then
   kustomize build deploy/environments/dev/kubernetes-vllm/${VLLM_MODE} | envsubst | kubectl -n "${NAMESPACE}" delete --ignore-not-found=true -f -
 else
   echo "INFO: Deploying vLLM Environment in namespace ${NAMESPACE}"
-  oc adm policy add-scc-to-user anyuid -z default -n ${NAMESPACE}  # TODO - Change to security context
+  oc adm policy add-scc-to-user anyuid -z default -n ${NAMESPACE}
   kustomize build deploy/environments/dev/kubernetes-vllm/${VLLM_MODE} | envsubst | kubectl -n "${NAMESPACE}" apply -f -
 
   echo "INFO: Deploying Gateway Environment in namespace ${NAMESPACE}"
@@ -120,7 +121,7 @@ else
       kubectl -n "${NAMESPACE}" wait deployment/vllm-sim --for=condition=Available --timeout=60s
       ;;
     vllm)
-      kubectl -n "${NAMESPACE}" wait deployment/${VLLM_DEPLOYMENT_NAME} --for=condition=Available --timeout=300s
+      kubectl -n "${NAMESPACE}" wait deployment/${VLLM_DEPLOYMENT_NAME} --for=condition=Available --timeout=500s
       ;;
     vllm-p2p)
       kubectl -n "${NAMESPACE}" wait deployment/${VLLM_DEPLOYMENT_NAME} --for=condition=Available --timeout=180s