@@ -233,6 +233,8 @@ def __init__(self,
233233 | None ] = [None ] * self .num_micro_batches
234234 self .send_handles = [None ] * self .num_micro_batches
235235
236+ # Set of request IDs that are currently in flight across all micro batches.
237+ # The scheduler will avoid scheduling requests that are already in flight.
236238 self .inflight_req_ids = ReqIdsSet ()
237239
238240 # During warmup, we don't enable the profiler
@@ -2494,7 +2496,13 @@ def _pause_requests(self, requests_to_pause):
24942496 self ._terminate_request (req )
24952497
24962498 def _add_inflight_ids (self , scheduled_requests ):
2497- """Add reqids of current requests to self.inflight_req_ids."""
2499+ """Add request IDs of current requests to self.inflight_req_ids.
2500+
2501+ Non‑final context chunks are not added to the inflight set, so the scheduler can keep scheduling further
2502+ context chunks while earlier ones are in the PP pipeline. Only context requests that finish context phase
2503+ are inserted into the inflight set and collected into finished_ctx_reqs.
2504+ All generation requests are still inserted into the inflight set.
2505+ """
24982506 finished_ctx_reqs = []
24992507 for req in scheduled_requests .context_requests :
25002508 if req .is_last_context_chunk :
@@ -2511,7 +2519,11 @@ def _add_inflight_ids(self, scheduled_requests):
25112519 return finished_ctx_reqs
25122520
25132521 def _remove_inflight_ids (self , batch_state : BatchStatePP ):
2514- """Remove reqids of current requests from self.inflight_req_ids."""
2522+ """Remove request IDs of current requests from self.inflight_req_ids.
2523+
2524+ Context IDs are erased from the inflight set using batch_state.finished_ctx_reqs.
2525+ Generation IDs are erased using batch_state.sample_state.scheduled_requests.generation_requests.
2526+ """
25152527 for req in batch_state .finished_ctx_reqs :
25162528 logger .debug (
25172529 f"Context request with ID { req .request_id } removed from DECODER model inflight set"
0 commit comments