diff --git a/backend/charts/deepgram-self-hosted/dev_omi_values.yaml b/backend/charts/deepgram-self-hosted/dev_omi_values.yaml
index 1b803ea579..8553b46be3 100644
--- a/backend/charts/deepgram-self-hosted/dev_omi_values.yaml
+++ b/backend/charts/deepgram-self-hosted/dev_omi_values.yaml
@@ -60,6 +60,9 @@ scaling:
         # Discuss a reasoanble value with your Deepgram Account Representative
         # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling
         requestCapacityRatio:
+      behavior:
+        scaleUp:
+          stabilizationWindowSeconds: 120  # Wait 2 minutes before scaling up
 
 api:
   image:
@@ -114,7 +117,7 @@ engine:
       gpu: 1
     limits:
       memory: "40Gi"
-      cpu: "8000m"
+      cpu: "12000m"
       gpu: 1
   # Discuss a reasonable value with your Deepgram Account Representative
   # If not using autoscaling, can be left empty, but must be set if using
diff --git a/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md b/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md
index def18af9fd..353c27beac 100644
--- a/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md
+++ b/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md
@@ -242,7 +242,7 @@ gcloud container node-pools create engine-pool \
     --num-nodes 1 \
     --enable-autoscaling \
     --max-nodes 8 \
-    --machine-type g2-standard-8 \
+    --machine-type g2-standard-12 \
     --accelerator=type=nvidia-l4,count=1,gpu-driver-version=latest \
     --node-labels k8s.deepgram.com/node-type=engine
   
diff --git a/backend/charts/deepgram-self-hosted/prod_omi_values.yaml b/backend/charts/deepgram-self-hosted/prod_omi_values.yaml
index 77f3335b6d..f91001cd3c 100644
--- a/backend/charts/deepgram-self-hosted/prod_omi_values.yaml
+++ b/backend/charts/deepgram-self-hosted/prod_omi_values.yaml
@@ -60,6 +60,9 @@ scaling:
         # Discuss a reasoanble value with your Deepgram Account Representative
         # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling
         requestCapacityRatio:
+      behavior:
+        scaleUp:
+          stabilizationWindowSeconds: 120  # Wait 2 minutes before scaling up
 
 api:
   image:
@@ -114,7 +117,7 @@ engine:
       gpu: 1
     limits:
       memory: "40Gi"
-      cpu: "8000m"
+      cpu: "12000m"
       gpu: 1
   # Discuss a reasonable value with your Deepgram Account Representative
   # If not using autoscaling, can be left empty, but must be set if using