From 9b211b2692fe091a348f3cb65f4c6a6662139857 Mon Sep 17 00:00:00 2001
From: Lanyu Liao <lancelly@users.noreply.github.com>
Date: Fri, 21 Nov 2025 01:39:50 -0800
Subject: [PATCH] use torch.empty instead of torch.zeros in get_buffer

Signed-off-by: Lanyu Liao <lancelly@users.noreply.github.com>
---
 tensorrt_llm/_torch/memory_buffer_utils.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorrt_llm/_torch/memory_buffer_utils.py b/tensorrt_llm/_torch/memory_buffer_utils.py
index adf014a9872..9d0443fe848 100644
--- a/tensorrt_llm/_torch/memory_buffer_utils.py
+++ b/tensorrt_llm/_torch/memory_buffer_utils.py
@@ -51,6 +51,12 @@ def _view_as(buffer: torch.Tensor, target_shape: list[int],
 
     def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
                    buffer_name: str, reserve_buffer: bool):
+        """Return a reusable buffer view for the requested shape/dtype.
+
+        The returned tensor is backed by an underlying `torch.uint8` buffer. When
+        no suitable buffer exists in the pool, a new tensor is created via
+        `torch.empty`, so its contents are uninitialized. Overwrite the data before use if needed.
+        """
 
         # all buffers are allocated with 1 byte element size
         required_memory_size = math.prod(tensor_shape) * dtype.itemsize
@@ -91,7 +97,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
         new_buffer_tensor = None
         try:
             with torch.cuda.memory.use_mem_pool(get_shared_pool()):
-                new_buffer_tensor = torch.zeros((required_memory_size, ),
+                new_buffer_tensor = torch.empty((required_memory_size, ),
                                                 device='cuda',
                                                 dtype=torch.uint8)
         except Exception as ex:
@@ -101,7 +107,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
             )
             # if exception happens during allocating memory from shared pool, retry
             # to allocate from default pool
-            new_buffer_tensor = torch.zeros((required_memory_size, ),
+            new_buffer_tensor = torch.empty((required_memory_size, ),
                                             device='cuda',
                                             dtype=torch.uint8)