Skip to content

Commit 6ac1065

Browse files
committed
test: Add KV transfer cancellation test on TRT-LLM
Signed-off-by: Jacky <[email protected]>
1 parent f0ca16f commit 6ac1065

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,95 @@ def test_request_cancellation_trtllm_prefill_cancel(
362362
logger.info(
363363
"Completion request cancellation during prefill phase detected successfully"
364364
)
365+
366+
367+
@pytest.mark.trtllm_marker
368+
@pytest.mark.gpu_1
369+
@pytest.mark.e2e
370+
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
371+
def test_request_cancellation_trtllm_kv_transfer_cancel(
372+
request, runtime_services, predownload_models
373+
):
374+
"""
375+
End-to-end test for request cancellation during prefill to decode KV transfer phase.
376+
377+
This test verifies that when a request is cancelled by the client during the KV transfer phase,
378+
the system properly handles the cancellation and cleans up resources on the prefill worker.
379+
"""
380+
381+
# Step 1: Start the frontend
382+
with DynamoFrontendProcess(request) as frontend:
383+
logger.info("Frontend started successfully")
384+
385+
# Step 2: Start the prefill worker
386+
with DynamoWorkerProcess(request, mode="prefill") as prefill_worker:
387+
logger.info(f"Prefill Worker PID: {prefill_worker.get_pid()}")
388+
389+
# Step 3: Start the decode worker
390+
with DynamoWorkerProcess(request, mode="decode") as decode_worker:
391+
logger.info(f"Decode Worker PID: {decode_worker.get_pid()}")
392+
393+
# TODO: Why wait after worker ready fixes frontend 404 / 500 flakiness?
394+
time.sleep(2)
395+
396+
# Step 4: Test request cancellation during KV transfer phase
397+
logger.info(
398+
"Testing completion request cancellation during KV transfer phase..."
399+
)
400+
401+
# Send request with long prompt
402+
cancellable_req = send_cancellable_request(
403+
"completion", use_long_prompt=True
404+
)
405+
406+
# Poll for "Prefill Request ID" pattern in prefill worker
407+
request_id, prefill_log_offset = poll_for_pattern(
408+
process=prefill_worker,
409+
pattern="Prefill Request ID: ",
410+
match_type="contains",
411+
)
412+
413+
# Poll for start sending KV cache pattern
414+
_, prefill_log_offset = poll_for_pattern(
415+
process=prefill_worker,
416+
pattern="Start sending KV cache for request ID: ",
417+
log_offset=prefill_log_offset,
418+
poll_interval_ms=2,
419+
match_type="contains",
420+
)
421+
422+
# Cancel during KV transfer phase
423+
cancellable_req.cancel()
424+
logger.info(
425+
f"Cancelled request ID: {request_id} during KV transfer phase"
426+
)
427+
428+
# Poll for "Aborted Request ID" in decode worker
429+
_, decode_log_offset = poll_for_pattern(
430+
process=decode_worker,
431+
pattern=f"Aborted Request ID: {request_id}",
432+
)
433+
434+
# Verify frontend log has kill message
435+
_, frontend_log_offset = poll_for_pattern(
436+
process=frontend,
437+
pattern="issued control message Kill to sender",
438+
)
439+
440+
logger.info(
441+
"Completion request cancellation during KV transfer phase detected successfully"
442+
)
443+
444+
# Verify the workers are still functional
445+
cancellable_req = send_cancellable_request("chat_completion_stream")
446+
_, decode_log_offset = poll_for_pattern(
447+
process=decode_worker,
448+
pattern="Decode Request ID: ",
449+
log_offset=decode_log_offset,
450+
match_type="contains",
451+
)
452+
read_streaming_responses(cancellable_req, expected_count=5)
453+
454+
logger.info(
455+
"Workers are functional after cancellation during KV transfer"
456+
)

0 commit comments

Comments
 (0)