[chore] Improve documentation for inflight request IDs.

Funatiq · Funatiq · commit 0e4c83bd9545 · 2025-11-21T12:32:12.000Z
Signed-off-by: Robin Kobus &lt;19427718+Funatiq@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -233,6 +233,8 @@ def __init__(self,
                                  | None] = [None] * self.num_micro_batches
         self.send_handles = [None] * self.num_micro_batches
 
+        # Set of request IDs that are currently in flight across all micro batches.
+        # The scheduler will avoid scheduling requests that are already in flight.
         self.inflight_req_ids = ReqIdsSet()
 
         # During warmup, we don't enable the profiler
@@ -2484,7 +2486,13 @@ def _pause_requests(self, requests_to_pause):
             self._terminate_request(req)
 
     def _add_inflight_ids(self, scheduled_requests):
-        """Add reqids of current requests to self.inflight_req_ids."""
+        """Add request IDs of current requests to self.inflight_req_ids.
+
+        Non‑final context chunks are not added to the inflight set, so the scheduler can keep scheduling further
+        context chunks while earlier ones are in the PP pipeline. Only context requests that finish context phase
+        are inserted into the inflight set and collected into finished_ctx_reqs.
+        All generation requests are still inserted into the inflight set.
+        """
         finished_ctx_reqs = []
         for req in scheduled_requests.context_requests:
             if req.is_last_context_chunk:
@@ -2501,7 +2509,11 @@ def _add_inflight_ids(self, scheduled_requests):
         return finished_ctx_reqs
 
     def _remove_inflight_ids(self, batch_state: BatchStatePP):
-        """Remove reqids of current requests from self.inflight_req_ids."""
+        """Remove request IDs of current requests from self.inflight_req_ids.
+
+        Context IDs are erased from the inflight set using batch_state.finished_ctx_reqs.
+        Generation IDs are erased using batch_state.sample_state.scheduled_requests.generation_requests.
+        """
         for req in batch_state.finished_ctx_reqs:
             logger.debug(
                 f"Context request with ID {req.request_id} removed from DECODER model inflight set"