From f22bbd35ddc8903647169e07961b32961e7ed4c7 Mon Sep 17 00:00:00 2001 From: tanmayv25 Date: Mon, 24 Nov 2025 11:12:54 -0800 Subject: [PATCH] Increase the failure threshold for k8s dsr1 trtllm wideep deploy.yaml --- recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml b/recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml index 936bcb5bee..1a664ee685 100644 --- a/recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml +++ b/recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml @@ -172,7 +172,7 @@ spec: initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 - failureThreshold: 500 + failureThreshold: 600 volumeMounts: - name: prefill-config-volume mountPath: /config @@ -230,7 +230,7 @@ spec: initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 - failureThreshold: 500 + failureThreshold: 600 volumeMounts: - name: decode-config-volume mountPath: /config