From 9b211b2692fe091a348f3cb65f4c6a6662139857 Mon Sep 17 00:00:00 2001 From: Lanyu Liao Date: Fri, 21 Nov 2025 01:39:50 -0800 Subject: [PATCH] use torch.empty instead of torch.zeros in get_buffer Signed-off-by: Lanyu Liao --- tensorrt_llm/_torch/memory_buffer_utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/_torch/memory_buffer_utils.py b/tensorrt_llm/_torch/memory_buffer_utils.py index adf014a9872..9d0443fe848 100644 --- a/tensorrt_llm/_torch/memory_buffer_utils.py +++ b/tensorrt_llm/_torch/memory_buffer_utils.py @@ -51,6 +51,12 @@ def _view_as(buffer: torch.Tensor, target_shape: list[int], def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype, buffer_name: str, reserve_buffer: bool): + """Return a reusable buffer view for the requested shape/dtype. + + The returned tensor is backed by an underlying `torch.uint8` buffer. When + no suitable buffer exists in the pool, a new tensor is created via + `torch.empty`, so its contents are uninitialized. Overwrite the data before use if needed. + """ # all buffers are allocated with 1 byte element size required_memory_size = math.prod(tensor_shape) * dtype.itemsize @@ -91,7 +97,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype, new_buffer_tensor = None try: with torch.cuda.memory.use_mem_pool(get_shared_pool()): - new_buffer_tensor = torch.zeros((required_memory_size, ), + new_buffer_tensor = torch.empty((required_memory_size, ), device='cuda', dtype=torch.uint8) except Exception as ex: @@ -101,7 +107,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype, ) # if exception happens during allocating memory from shared pool, retry # to allocate from default pool - new_buffer_tensor = torch.zeros((required_memory_size, ), + new_buffer_tensor = torch.empty((required_memory_size, ), device='cuda', dtype=torch.uint8)