[https://nvbugs/5606166][fix] AutoDeploy: use tuples for cudagraph shape lookup (#8658)

lucaslie · web-flow · commit 0ee71d95ec20 · 2025-10-28T10:52:43.000-07:00
Signed-off-by: Lucas Liebenwein &lt;11156568+lucaslie@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/compile/backends/torch_cudagraph.py b/tensorrt_llm/_torch/auto_deploy/compile/backends/torch_cudagraph.py
@@ -139,7 +139,7 @@ def capture_graph(self, *args, **kwargs):
             args, kwargs = self._in_spec.unflatten(inputs_truncated + args_static)
 
             # capture graph for truncated inputs
-            combined_shape = sum((input.shape for input in inputs_truncated), start=())
+            combined_shape = sum((tuple(input.shape) for input in inputs_truncated), start=())
             self.cudagraphs[combined_shape] = self._capture_one_graph(*args, **kwargs)
 
     def forward(self, *args, **kwargs) -> Any:
@@ -157,7 +157,7 @@ def forward(self, *args, **kwargs) -> Any:
 
         # Calculate rounded-up shapes for each input
         rounded_shapes = [
-            (self.round_to_cuda_batch_size(input.shape[0]),) + input.shape[1:]
+            (self.round_to_cuda_batch_size(input.shape[0]),) + tuple(input.shape[1:])
             for input in args_batched
         ]
         combined_shape = sum(rounded_shapes, start=())
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/compile/test_cuda_graph_batch_sizes.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/compile/test_cuda_graph_batch_sizes.py
@@ -35,6 +35,10 @@
 class TestCudaGraphBatchSizes:
     """Test class for CUDA graph batch size handling."""
 
+    @staticmethod
+    def _raise_error_for_forward(*args, **kwargs):
+        raise RuntimeError("forward method should not be called")
+
     @pytest.fixture
     def simple_model_and_inputs(self):
         """Create a simple model and inputs for testing."""
@@ -192,7 +196,13 @@ def test_forward_uses_cuda_graph_for_valid_batch_sizes(self, simple_model_and_in
             test_input = data["input_tensor"][:batch_size]
 
             with torch.inference_mode():
-                output = captured_graph.forward(test_input)
+                # temporarily remove model forward to ensure that the captured graph is used
+                original_forward = captured_graph.model.forward
+                captured_graph.model.forward = self._raise_error_for_forward
+                try:
+                    output = captured_graph.forward(test_input)
+                finally:
+                    captured_graph.model.forward = original_forward
 
                 # Should get valid output
                 assert output is not None