ai-dynamo · tzulingk · Oct 31, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 29, 2025
diff --git a/tests/fault_tolerance/deploy/README.md b/tests/fault_tolerance/deploy/README.md
@@ -119,6 +119,17 @@ The following failure types are defined in `scenarios.py`:
 | `sglang_prefill_scheduler`    | Terminate SGLang prefill scheduler process.        | `SIGKILL` to `sglang::scheduler`| sglang only         |
 | `sglang_prefill_detokenizer`  | Terminate SGLang prefill detokenizer process.      | `SIGKILL` to `sglang::detokenizer`| sglang only         |
 
+#### Token Overflow Tests
+
+In addition to process and pod failures, the suite includes tests for **token overflow**, where the model receives an input prompt larger than its configured `max_seq_len`. These tests are crucial for verifying that the system can gracefully reject invalid requests without crashing.
+
+- **Failure Injection**: Unlike other tests, this failure is injected from the **client side**. The `aiperf` client is configured to send a batch of requests with oversized token lengths.
+- **Two-Phase Execution**: These tests run in two distinct phases, creating separate log directories for each:
+  1.  **`overflow` Phase**: Sends oversized requests. The expected outcome is a high rate of failed requests (rejections) as the server correctly identifies and blocks them.
+  2.  **`recovery` Phase**: Immediately after the overflow phase, sends valid, normal-sized requests. The expected outcome is a high success rate, confirming that the system has recovered and remains operational.
+
+The combined results of these two phases demonstrate both the system's ability to reject invalid inputs and its stability after handling them.
+
 #### Example Scenario Breakdown
 
 **Scenario**: `sglang-agg-tp-2-dp-1-decode_worker`
@@ -392,7 +403,6 @@ graph LR
     style DecodePool stroke:#000,stroke-width:2px
 ```
 
-
 #### Summary:
 
 
@@ -596,3 +606,5 @@ Test Group: vllm-agg-tp-1-dp-2
 ╘═══════════════════╧═══════════╧═══════════╧══════════╧═══════════╧══════════╧═══════════╧═══════════╧════════════╛
 
 ```
+
+
@@ -418,14 +418,15 @@ def process_test_directory(test_dir, sla):
     }
 
 
-def main(logs_dir, tablefmt, log_paths=None, sla=None):
+def main(logs_dir, tablefmt, log_paths=None, sla=None, print_output=True):
     """Main entry point for parsing legacy client results.
 
     Args:
         logs_dir: Base directory containing test results
         tablefmt: Table format for output (e.g., "fancy_grid")
         log_paths: Optional list of specific log paths to process
         sla: Optional SLA threshold for latency violations
+        print_output: If True, print tables and summaries. If False, only return results.
     """
     results = []
 
@@ -542,19 +543,21 @@ def main(logs_dir, tablefmt, log_paths=None, sla=None):
                 ]
             rows.append(row)
 
-        print(f"\nTest Group: {test_prefix}")
-        print(
-            tabulate(
-                rows,
-                headers,
-                tablefmt=tablefmt,
-                floatfmt=".2f",
-                missingval="N/A",
-                numalign="right",
-                stralign="center",
+        if print_output:
+            logging.info(f"\nTest Group: {test_prefix}")
+            logging.info(
+                "\n"
+                + tabulate(
+                    rows,
+                    headers,
+                    tablefmt=tablefmt,
+                    floatfmt=".2f",
+                    missingval="N/A",
+                    numalign="right",
+                    stralign="center",
+                )
             )
-        )
-        print("\n" + "=" * 80)
+            logging.info("\n" + "=" * 80)
 
 
 if __name__ == "__main__":

@@ -103,7 +103,9 @@ def parse_test_results(
     log_paths: Optional[List[str]] = None,
     tablefmt: str = "grid",
     sla: Optional[float] = None,
+    success_threshold: float = 90.0,
     force_parser: Optional[str] = None,
+    print_output: bool = True,
 ) -> Any:
     """Auto-detect and parse test results using the appropriate parser.
 
@@ -116,8 +118,10 @@ def parse_test_results(
         log_paths: List of log directories to process (for multiple directories)
         tablefmt: Table format for output (e.g., "fancy_grid", "pipe")
         sla: Optional SLA threshold for latency violations
+        success_threshold: Success rate threshold for pass/fail (default: 90.0)
         force_parser: Optional override to force using a specific parser
                      ("aiperf" or "legacy"). If not provided, auto-detection is used.
+        print_output: If True, print tables and summaries. If False, only return results.
 
     Returns:
         Results from the appropriate parser
@@ -189,13 +193,17 @@ def parse_test_results(
                 log_paths=log_paths,
                 tablefmt=tablefmt,
                 sla=sla,
+                success_threshold=success_threshold,
+                print_output=print_output,
             )
         else:
             return parse_aiperf(
                 logs_dir=log_dir,
                 log_paths=None,
                 tablefmt=tablefmt,
                 sla=sla,
+                success_threshold=success_threshold,
+                print_output=print_output,
             )
 
     elif parser_type == "legacy":
@@ -209,13 +217,15 @@ def parse_test_results(
                 log_paths=log_paths,
                 tablefmt=tablefmt,
                 sla=sla,
+                print_output=print_output,
             )
         else:
             return parse_legacy(
                 logs_dir=log_dir,
                 log_paths=None,
                 tablefmt=tablefmt,
                 sla=sla,
+                print_output=print_output,
             )
 
     else:
@@ -294,18 +304,18 @@ def print_result_info(log_dir: str) -> None:
     """
     info = get_result_info(log_dir)
 
-    print(f"\nTest Results Information: {log_dir}")
-    print("=" * 60)
-    print(f"Result Type: {info['type'] or 'Unknown'}")
-    print(f"Client Count: {info['client_count']}")
-    print(f"Has Test Log: {info['has_test_log']}")
+    logging.info(f"\nTest Results Information: {log_dir}")
+    logging.info("=" * 60)
+    logging.info(f"Result Type: {info['type'] or 'Unknown'}")
+    logging.info(f"Client Count: {info['client_count']}")
+    logging.info(f"Has Test Log: {info['has_test_log']}")
 
     if info["details"]:
-        print("\nDetails:")
+        logging.info("\nDetails:")
         for key, value in info["details"].items():
-            print(f"  {key}: {value}")
+            logging.info(f"  {key}: {value}")
 
-    print("=" * 60)
+    logging.info("=" * 60)
 
 
 if __name__ == "__main__":
@@ -354,7 +364,7 @@ def print_result_info(log_dir: str) -> None:
             for log_path in args.log_paths:
                 print_result_info(log_path)
         else:
-            print("Error: Must provide log_dir or --log-paths")
+            logging.error("Must provide log_dir or --log-paths")
     else:
         # Parse mode
         try: