[https://nvbugs/5629833][fix] Don't fill tensors with 0 (#9296)

HuiGao-NV · web-flow · commit 6dd2fcd7b3f8 · 2025-11-21T20:50:05.000+08:00
Signed-off-by: Hui Gao &lt;huig@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/memory_buffer_utils.py b/tensorrt_llm/_torch/memory_buffer_utils.py
@@ -51,6 +51,11 @@ def _view_as(buffer: torch.Tensor, target_shape: list[int],
 
     def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
                    buffer_name: str, reserve_buffer: bool):
+        """Return a reusable buffer view for the requested shape/dtype.
+        The returned tensor is backed by an underlying `torch.uint8` buffer. When
+        no suitable buffer exists in the pool, a new tensor is created via
+        `torch.empty`, so its contents are uninitialized. Overwrite the data before use if needed.
+        """
 
         # all buffers are allocated with 1 byte element size
         required_memory_size = math.prod(tensor_shape) * dtype.itemsize
@@ -91,7 +96,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
         new_buffer_tensor = None
         try:
             with torch.cuda.memory.use_mem_pool(get_shared_pool()):
-                new_buffer_tensor = torch.zeros((required_memory_size, ),
+                new_buffer_tensor = torch.empty((required_memory_size, ),
                                                 device='cuda',
                                                 dtype=torch.uint8)
         except Exception as ex:
@@ -101,7 +106,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
             )
             # if exception happens during allocating memory from shared pool, retry
             # to allocate from default pool
-            new_buffer_tensor = torch.zeros((required_memory_size, ),
+            new_buffer_tensor = torch.empty((required_memory_size, ),
                                             device='cuda',
                                             dtype=torch.uint8)