diff --git a/backend/charts/deepgram-self-hosted/dev_omi_values.yaml b/backend/charts/deepgram-self-hosted/dev_omi_values.yaml index 1b803ea579..8553b46be3 100644 --- a/backend/charts/deepgram-self-hosted/dev_omi_values.yaml +++ b/backend/charts/deepgram-self-hosted/dev_omi_values.yaml @@ -60,6 +60,9 @@ scaling: # Discuss a reasoanble value with your Deepgram Account Representative # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling requestCapacityRatio: + behavior: + scaleUp: + stabilizationWindowSeconds: 120 # Wait 2 minutes before scaling up api: image: @@ -114,7 +117,7 @@ engine: gpu: 1 limits: memory: "40Gi" - cpu: "8000m" + cpu: "12000m" gpu: 1 # Discuss a reasonable value with your Deepgram Account Representative # If not using autoscaling, can be left empty, but must be set if using diff --git a/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md b/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md index def18af9fd..353c27beac 100644 --- a/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md +++ b/backend/charts/deepgram-self-hosted/how_to_deploy_deepgram_self_hosted_on_gke.md @@ -242,7 +242,7 @@ gcloud container node-pools create engine-pool \ --num-nodes 1 \ --enable-autoscaling \ --max-nodes 8 \ - --machine-type g2-standard-8 \ + --machine-type g2-standard-12 \ --accelerator=type=nvidia-l4,count=1,gpu-driver-version=latest \ --node-labels k8s.deepgram.com/node-type=engine diff --git a/backend/charts/deepgram-self-hosted/prod_omi_values.yaml b/backend/charts/deepgram-self-hosted/prod_omi_values.yaml index 77f3335b6d..f91001cd3c 100644 --- a/backend/charts/deepgram-self-hosted/prod_omi_values.yaml +++ b/backend/charts/deepgram-self-hosted/prod_omi_values.yaml @@ -60,6 +60,9 @@ scaling: # Discuss a reasoanble value with your Deepgram Account Representative # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling requestCapacityRatio: + behavior: + scaleUp: + stabilizationWindowSeconds: 120 # Wait 2 minutes before scaling up api: image: @@ -114,7 +117,7 @@ engine: gpu: 1 limits: memory: "40Gi" - cpu: "8000m" + cpu: "12000m" gpu: 1 # Discuss a reasonable value with your Deepgram Account Representative # If not using autoscaling, can be left empty, but must be set if using