NVIDIA · lancelly · Nov 21, 2025
@@ -51,6 +51,12 @@ def _view_as(buffer: torch.Tensor, target_shape: list[int],
 
     def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
                    buffer_name: str, reserve_buffer: bool):
+        """Return a reusable buffer view for the requested shape/dtype.
+
+        The returned tensor is backed by an underlying `torch.uint8` buffer. When
+        no suitable buffer exists in the pool, a new tensor is created via
+        `torch.empty`, so its contents are uninitialized. Overwrite the data before use if needed.
+        """
 
         # all buffers are allocated with 1 byte element size
         required_memory_size = math.prod(tensor_shape) * dtype.itemsize
@@ -91,7 +97,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
         new_buffer_tensor = None
         try:
             with torch.cuda.memory.use_mem_pool(get_shared_pool()):
-                new_buffer_tensor = torch.zeros((required_memory_size, ),
+                new_buffer_tensor = torch.empty((required_memory_size, ),
                                                 device='cuda',
                                                 dtype=torch.uint8)
         except Exception as ex:
@@ -101,7 +107,7 @@ def get_buffer(self, tensor_shape: list[int], dtype: torch.dtype,
             )
             # if exception happens during allocating memory from shared pool, retry
             # to allocate from default pool
-            new_buffer_tensor = torch.zeros((required_memory_size, ),
+            new_buffer_tensor = torch.empty((required_memory_size, ),
                                             device='cuda',
                                             dtype=torch.uint8)