Skip to content

Commit bbaab9f

Browse files
authored
test: Add KV transfer cancellation test on TRT-LLM (#4547)
Signed-off-by: Jacky <[email protected]>
1 parent 26eb14c commit bbaab9f

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,3 +364,94 @@ def test_request_cancellation_trtllm_prefill_cancel(
364364
logger.info(
365365
"Completion request cancellation during prefill phase detected successfully"
366366
)
367+
368+
369+
@pytest.mark.trtllm_marker
370+
@pytest.mark.gpu_1
371+
@pytest.mark.e2e
372+
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
373+
def test_request_cancellation_trtllm_kv_transfer_cancel(
374+
request, runtime_services, predownload_models
375+
):
376+
"""
377+
End-to-end test for request cancellation during prefill to decode KV transfer phase.
378+
379+
This test verifies that when a request is cancelled by the client during the KV transfer phase,
380+
the system properly handles the cancellation and cleans up resources on the workers.
381+
"""
382+
383+
# Step 1: Start the frontend
384+
with DynamoFrontendProcess(request) as frontend:
385+
logger.info("Frontend started successfully")
386+
387+
# Step 2: Start the prefill worker
388+
with DynamoWorkerProcess(request, mode="prefill") as prefill_worker:
389+
logger.info(f"Prefill Worker PID: {prefill_worker.get_pid()}")
390+
391+
# Step 3: Start the decode worker
392+
with DynamoWorkerProcess(request, mode="decode") as decode_worker:
393+
logger.info(f"Decode Worker PID: {decode_worker.get_pid()}")
394+
395+
# TODO: Why wait after worker ready fixes frontend 404 / 500 flakiness?
396+
time.sleep(2)
397+
398+
# Step 4: Test request cancellation during KV transfer phase
399+
logger.info(
400+
"Testing completion request cancellation during KV transfer phase..."
401+
)
402+
403+
# Send request with long prompt
404+
cancellable_req = send_cancellable_request(
405+
"completion", use_long_prompt=True
406+
)
407+
408+
# Poll for "Prefill Request ID" pattern in prefill worker
409+
request_id, prefill_log_offset = poll_for_pattern(
410+
process=prefill_worker,
411+
pattern="Prefill Request ID: ",
412+
match_type="contains",
413+
)
414+
415+
# Poll for decode worker entry signaling start of KV transfer phase
416+
_, decode_log_offset = poll_for_pattern(
417+
process=decode_worker,
418+
pattern=f"Decode Request ID: {request_id}",
419+
poll_interval_ms=2,
420+
)
421+
422+
# Cancel during KV transfer phase in decode worker
423+
cancellable_req.cancel()
424+
logger.info(
425+
f"Cancelled request ID: {request_id} at beginning of decode"
426+
)
427+
428+
# Poll for "Aborted Request ID" in decode worker
429+
_, decode_log_offset = poll_for_pattern(
430+
process=decode_worker,
431+
pattern=f"Aborted Request ID: {request_id}",
432+
log_offset=decode_log_offset,
433+
)
434+
435+
# Verify frontend log has kill message
436+
_, frontend_log_offset = poll_for_pattern(
437+
process=frontend,
438+
pattern="issued control message Kill to sender",
439+
)
440+
441+
logger.info(
442+
"Completion request cancellation at beginning of decode detected successfully"
443+
)
444+
445+
# Verify the workers are still functional
446+
cancellable_req = send_cancellable_request("chat_completion_stream")
447+
_, decode_log_offset = poll_for_pattern(
448+
process=decode_worker,
449+
pattern="Decode Request ID: ",
450+
log_offset=decode_log_offset,
451+
match_type="contains",
452+
)
453+
read_streaming_responses(cancellable_req, expected_count=5)
454+
455+
logger.info(
456+
"Workers are functional after cancellation during KV transfer"
457+
)

0 commit comments

Comments
 (0)