@@ -233,6 +233,8 @@ def __init__(self,
233233 | None ] = [None ] * self .num_micro_batches
234234 self .send_handles = [None ] * self .num_micro_batches
235235
236+ # Set of request IDs that are currently in flight across all micro batches.
237+ # The scheduler will avoid scheduling requests that are already in flight.
236238 self .inflight_req_ids = ReqIdsSet ()
237239
238240 # During warmup, we don't enable the profiler
@@ -2484,7 +2486,13 @@ def _pause_requests(self, requests_to_pause):
24842486 self ._terminate_request (req )
24852487
24862488 def _add_inflight_ids (self , scheduled_requests ):
2487- """Add reqids of current requests to self.inflight_req_ids."""
2489+ """Add request IDs of current requests to self.inflight_req_ids.
2490+
2491+ Non‑final context chunks are not added to the inflight set, so the scheduler can keep scheduling further
2492+ context chunks while earlier ones are in the PP pipeline. Only context requests that finish context phase
2493+ are inserted into the inflight set and collected into finished_ctx_reqs.
2494+ All generation requests are still inserted into the inflight set.
2495+ """
24882496 finished_ctx_reqs = []
24892497 for req in scheduled_requests .context_requests :
24902498 if req .is_last_context_chunk :
@@ -2501,7 +2509,11 @@ def _add_inflight_ids(self, scheduled_requests):
25012509 return finished_ctx_reqs
25022510
25032511 def _remove_inflight_ids (self , batch_state : BatchStatePP ):
2504- """Remove reqids of current requests from self.inflight_req_ids."""
2512+ """Remove request IDs of current requests from self.inflight_req_ids.
2513+
2514+ Context IDs are erased from the inflight set using batch_state.finished_ctx_reqs.
2515+ Generation IDs are erased using batch_state.sample_state.scheduled_requests.generation_requests.
2516+ """
25052517 for req in batch_state .finished_ctx_reqs :
25062518 logger .debug (
25072519 f"Context request with ID { req .request_id } removed from DECODER model inflight set"
0 commit comments