Skip to content

Commit 1da1751

Browse files
committed
[chore] Improve documentation for inflight request IDs.
Signed-off-by: Robin Kobus <[email protected]>
1 parent 58b7cdc commit 1da1751

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ def __init__(self,
233233
| None] = [None] * self.num_micro_batches
234234
self.send_handles = [None] * self.num_micro_batches
235235

236+
# Set of request IDs that are currently in flight across all micro batches.
237+
# The scheduler will avoid scheduling requests that are already in flight.
236238
self.inflight_req_ids = ReqIdsSet()
237239

238240
# During warmup, we don't enable the profiler
@@ -2494,7 +2496,13 @@ def _pause_requests(self, requests_to_pause):
24942496
self._terminate_request(req)
24952497

24962498
def _add_inflight_ids(self, scheduled_requests):
2497-
"""Add reqids of current requests to self.inflight_req_ids."""
2499+
"""Add request IDs of current requests to self.inflight_req_ids.
2500+
2501+
Non‑final context chunks are not added to the inflight set, so the scheduler can keep scheduling further
2502+
context chunks while earlier ones are in the PP pipeline. Only context requests that finish context phase
2503+
are inserted into the inflight set and collected into finished_ctx_reqs.
2504+
All generation requests are still inserted into the inflight set.
2505+
"""
24982506
finished_ctx_reqs = []
24992507
for req in scheduled_requests.context_requests:
25002508
if req.is_last_context_chunk:
@@ -2511,7 +2519,11 @@ def _add_inflight_ids(self, scheduled_requests):
25112519
return finished_ctx_reqs
25122520

25132521
def _remove_inflight_ids(self, batch_state: BatchStatePP):
2514-
"""Remove reqids of current requests from self.inflight_req_ids."""
2522+
"""Remove request IDs of current requests from self.inflight_req_ids.
2523+
2524+
Context IDs are erased from the inflight set using batch_state.finished_ctx_reqs.
2525+
Generation IDs are erased using batch_state.sample_state.scheduled_requests.generation_requests.
2526+
"""
25152527
for req in batch_state.finished_ctx_reqs:
25162528
logger.debug(
25172529
f"Context request with ID {req.request_id} removed from DECODER model inflight set"

0 commit comments

Comments
 (0)