Enhance DG self-hosted config

BasedHardware · Mar 8, 2025 · 224222d · 224222d
1 parent 90fd2e4
commit 224222d
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 3 deletions.
diff --git a/backend/charts/deepgram-self-hosted/dev_omi_values.yaml b/backend/charts/deepgram-self-hosted/dev_omi_values.yaml
@@ -60,6 +60,9 @@ scaling:
         # Discuss a reasoanble value with your Deepgram Account Representative
         # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling
         requestCapacityRatio:
+      behavior:
+        scaleUp:
+          stabilizationWindowSeconds: 120  # Wait 2 minutes before scaling up
 
 api:
   image:
@@ -114,7 +117,7 @@ engine:
       gpu: 1
     limits:
       memory: "40Gi"
-      cpu: "8000m"
+      cpu: "12000m"
       gpu: 1
   # Discuss a reasonable value with your Deepgram Account Representative
   # If not using autoscaling, can be left empty, but must be set if using

diff --git a/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md b/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md
@@ -242,7 +242,7 @@ gcloud container node-pools create engine-pool \
     --num-nodes 1 \
     --enable-autoscaling \
     --max-nodes 8 \
-    --machine-type g2-standard-8 \
+    --machine-type g2-standard-12 \
     --accelerator=type=nvidia-l4,count=1,gpu-driver-version=latest \
     --node-labels k8s.deepgram.com/node-type=engine
 

diff --git a/backend/charts/deepgram-self-hosted/prod_omi_values.yaml b/backend/charts/deepgram-self-hosted/prod_omi_values.yaml
@@ -60,6 +60,9 @@ scaling:
         # Discuss a reasoanble value with your Deepgram Account Representative
         # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling
         requestCapacityRatio:
+      behavior:
+        scaleUp:
+          stabilizationWindowSeconds: 120  # Wait 2 minutes before scaling up
 
 api:
   image:
@@ -114,7 +117,7 @@ engine:
       gpu: 1
     limits:
       memory: "40Gi"
-      cpu: "8000m"
+      cpu: "12000m"
       gpu: 1
   # Discuss a reasonable value with your Deepgram Account Representative
   # If not using autoscaling, can be left empty, but must be set if using