@@ -364,3 +364,94 @@ def test_request_cancellation_trtllm_prefill_cancel(
364364 logger .info (
365365 "Completion request cancellation during prefill phase detected successfully"
366366 )
367+
368+
369+ @pytest .mark .trtllm_marker
370+ @pytest .mark .gpu_1
371+ @pytest .mark .e2e
372+ @pytest .mark .model (FAULT_TOLERANCE_MODEL_NAME )
373+ def test_request_cancellation_trtllm_kv_transfer_cancel (
374+ request , runtime_services , predownload_models
375+ ):
376+ """
377+ End-to-end test for request cancellation during prefill to decode KV transfer phase.
378+
379+ This test verifies that when a request is cancelled by the client during the KV transfer phase,
380+ the system properly handles the cancellation and cleans up resources on the workers.
381+ """
382+
383+ # Step 1: Start the frontend
384+ with DynamoFrontendProcess (request ) as frontend :
385+ logger .info ("Frontend started successfully" )
386+
387+ # Step 2: Start the prefill worker
388+ with DynamoWorkerProcess (request , mode = "prefill" ) as prefill_worker :
389+ logger .info (f"Prefill Worker PID: { prefill_worker .get_pid ()} " )
390+
391+ # Step 3: Start the decode worker
392+ with DynamoWorkerProcess (request , mode = "decode" ) as decode_worker :
393+ logger .info (f"Decode Worker PID: { decode_worker .get_pid ()} " )
394+
395+ # TODO: Why wait after worker ready fixes frontend 404 / 500 flakiness?
396+ time .sleep (2 )
397+
398+ # Step 4: Test request cancellation during KV transfer phase
399+ logger .info (
400+ "Testing completion request cancellation during KV transfer phase..."
401+ )
402+
403+ # Send request with long prompt
404+ cancellable_req = send_cancellable_request (
405+ "completion" , use_long_prompt = True
406+ )
407+
408+ # Poll for "Prefill Request ID" pattern in prefill worker
409+ request_id , prefill_log_offset = poll_for_pattern (
410+ process = prefill_worker ,
411+ pattern = "Prefill Request ID: " ,
412+ match_type = "contains" ,
413+ )
414+
415+ # Poll for decode worker entry signaling start of KV transfer phase
416+ _ , decode_log_offset = poll_for_pattern (
417+ process = decode_worker ,
418+ pattern = f"Decode Request ID: { request_id } " ,
419+ poll_interval_ms = 2 ,
420+ )
421+
422+ # Cancel during KV transfer phase in decode worker
423+ cancellable_req .cancel ()
424+ logger .info (
425+ f"Cancelled request ID: { request_id } at beginning of decode"
426+ )
427+
428+ # Poll for "Aborted Request ID" in decode worker
429+ _ , decode_log_offset = poll_for_pattern (
430+ process = decode_worker ,
431+ pattern = f"Aborted Request ID: { request_id } " ,
432+ log_offset = decode_log_offset ,
433+ )
434+
435+ # Verify frontend log has kill message
436+ _ , frontend_log_offset = poll_for_pattern (
437+ process = frontend ,
438+ pattern = "issued control message Kill to sender" ,
439+ )
440+
441+ logger .info (
442+ "Completion request cancellation at beginning of decode detected successfully"
443+ )
444+
445+ # Verify the workers are still functional
446+ cancellable_req = send_cancellable_request ("chat_completion_stream" )
447+ _ , decode_log_offset = poll_for_pattern (
448+ process = decode_worker ,
449+ pattern = "Decode Request ID: " ,
450+ log_offset = decode_log_offset ,
451+ match_type = "contains" ,
452+ )
453+ read_streaming_responses (cancellable_req , expected_count = 5 )
454+
455+ logger .info (
456+ "Workers are functional after cancellation during KV transfer"
457+ )
0 commit comments