Skip to content

Commit 0e4c83b

Browse files
committed
[chore] Improve documentation for inflight request IDs.
Signed-off-by: Robin Kobus <[email protected]>
1 parent 767d19d commit 0e4c83b

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ def __init__(self,
233233
| None] = [None] * self.num_micro_batches
234234
self.send_handles = [None] * self.num_micro_batches
235235

236+
# Set of request IDs that are currently in flight across all micro batches.
237+
# The scheduler will avoid scheduling requests that are already in flight.
236238
self.inflight_req_ids = ReqIdsSet()
237239

238240
# During warmup, we don't enable the profiler
@@ -2484,7 +2486,13 @@ def _pause_requests(self, requests_to_pause):
24842486
self._terminate_request(req)
24852487

24862488
def _add_inflight_ids(self, scheduled_requests):
2487-
"""Add reqids of current requests to self.inflight_req_ids."""
2489+
"""Add request IDs of current requests to self.inflight_req_ids.
2490+
2491+
Non‑final context chunks are not added to the inflight set, so the scheduler can keep scheduling further
2492+
context chunks while earlier ones are in the PP pipeline. Only context requests that finish context phase
2493+
are inserted into the inflight set and collected into finished_ctx_reqs.
2494+
All generation requests are still inserted into the inflight set.
2495+
"""
24882496
finished_ctx_reqs = []
24892497
for req in scheduled_requests.context_requests:
24902498
if req.is_last_context_chunk:
@@ -2501,7 +2509,11 @@ def _add_inflight_ids(self, scheduled_requests):
25012509
return finished_ctx_reqs
25022510

25032511
def _remove_inflight_ids(self, batch_state: BatchStatePP):
2504-
"""Remove reqids of current requests from self.inflight_req_ids."""
2512+
"""Remove request IDs of current requests from self.inflight_req_ids.
2513+
2514+
Context IDs are erased from the inflight set using batch_state.finished_ctx_reqs.
2515+
Generation IDs are erased using batch_state.sample_state.scheduled_requests.generation_requests.
2516+
"""
25052517
for req in batch_state.finished_ctx_reqs:
25062518
logger.debug(
25072519
f"Context request with ID {req.request_id} removed from DECODER model inflight set"

0 commit comments

Comments
 (0)