add more tests

Superjomn · Superjomn · commit 1bad84f017bc · 2025-10-24T10:09:43.000Z
Signed-off-by: Superjomn &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/llmapi/test_llm_examples.py b/tests/integration/defs/llmapi/test_llm_examples.py
@@ -14,7 +14,11 @@
 # limitations under the License.
 
 import os
+import subprocess
+import sys
+import threading
 from pathlib import Path
+from subprocess import PIPE, Popen
 
 import pytest
 from defs.common import venv_check_call
@@ -40,16 +44,8 @@ def test_llmapi_server_example(llm_root, llm_venv):
 
 
 ### LLMAPI examples
-def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
-                        *args):
-    example_root = Path(llm_root) / "examples" / "llm-api"
-    engine_dir = Path(engine_dir) / "llmapi"
-    if not engine_dir.exists():
-        engine_dir.mkdir(parents=True)
-    examples_script = example_root / script_name
-
-    run_command = [str(examples_script)] + list(args)
-
+def _setup_llmapi_example_softlinks(llm_venv):
+    """Create softlinks for LLM models to avoid duplicated downloading for llm api examples"""
     # Create llm models softlink to avoid duplicated downloading for llm api example
     src_dst_dict = {
         # TinyLlama-1.1B-Chat-v1.0
@@ -87,9 +83,96 @@ def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
                    cnn_dailymail_dst,
                    target_is_directory=True)
 
+
+def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
+                        *args):
+    example_root = Path(llm_root) / "examples" / "llm-api"
+    engine_dir = Path(engine_dir) / "llmapi"
+    if not engine_dir.exists():
+        engine_dir.mkdir(parents=True)
+    examples_script = example_root / script_name
+
+    run_command = [str(examples_script)] + list(args)
+
+    _setup_llmapi_example_softlinks(llm_venv)
+
     venv_check_call(llm_venv, run_command)
 
 
+def _mpirun_llmapi_example(llm_root,
+                           llm_venv,
+                           script_name: str,
+                           tp_size: int,
+                           spawn_extra_main_process: bool = True,
+                           *args):
+    """Run an llmapi example script with mpirun.
+
+    Args:
+        llm_root: Root directory of the LLM project
+        llm_venv: Virtual environment object
+        script_name: Name of the example script to run
+        tp_size: Tensor parallelism size (number of MPI processes)
+        spawn_extra_main_process: Whether to spawn extra main process (default: True)
+        *args: Additional arguments to pass to the example script
+    """
+    example_root = Path(llm_root) / "examples" / "llm-api"
+    examples_script = example_root / script_name
+
+    # Set environment variable for spawn_extra_main_process
+    env_vars = os.environ.copy()
+    env_vars[
+        'TLLM_SPAWN_EXTRA_MAIN_PROCESS'] = "1" if spawn_extra_main_process else "0"
+
+    run_command = [
+        "mpirun", "-n",
+        str(tp_size), "--oversubscribe", "--allow-run-as-root"
+    ]
+    # Pass environment variables through mpirun
+    for key, value in [('TLLM_SPAWN_EXTRA_MAIN_PROCESS',
+                        env_vars['TLLM_SPAWN_EXTRA_MAIN_PROCESS'])]:
+        run_command.extend(["-x", f"{key}={value}"])
+    run_command.extend(["python", str(examples_script)] + list(args))
+
+    _setup_llmapi_example_softlinks(llm_venv)
+
+    print(' '.join(run_command))
+
+    with Popen(run_command,
+               env=env_vars,
+               stdout=PIPE,
+               stderr=PIPE,
+               bufsize=1,
+               start_new_session=True,
+               universal_newlines=True,
+               cwd=llm_venv.get_working_directory()) as process:
+
+        # Function to read from a stream and write to output
+        def read_stream(stream, output_stream):
+            for line in stream:
+                output_stream.write(line)
+                output_stream.flush()
+
+        # Create threads to read stdout and stderr concurrently
+        stdout_thread = threading.Thread(target=read_stream,
+                                         args=(process.stdout, sys.stdout))
+        stderr_thread = threading.Thread(target=read_stream,
+                                         args=(process.stderr, sys.stderr))
+
+        # Start both threads
+        stdout_thread.start()
+        stderr_thread.start()
+
+        # Wait for the process to complete
+        return_code = process.wait()
+
+        # Wait for both threads to finish reading
+        stdout_thread.join()
+        stderr_thread.join()
+
+        if return_code != 0:
+            raise subprocess.CalledProcessError(return_code, run_command)
+
+
 def test_llmapi_quickstart(llm_root, engine_dir, llm_venv):
     _run_llmapi_example(llm_root, engine_dir, llm_venv, "quickstart_example.py")
 
@@ -133,6 +216,19 @@ def test_llmapi_example_distributed_tp2(llm_root, engine_dir, llm_venv):
                         "llm_inference_distributed.py")
 
 
+@pytest.mark.skip_less_device(2)
+@pytest.mark.parametrize(
+    "spawn_extra_main_process", [True, False],
+    ids=["spawn_extra_main_process", "no_spawn_extra_main_process"])
+def test_llmapi_example_launch_distributed_tp2(llm_root, llm_venv,
+                                               spawn_extra_main_process: bool):
+    _mpirun_llmapi_example(llm_root,
+                           llm_venv,
+                           "llm_inference_distributed.py",
+                           tp_size=2,
+                           spawn_extra_main_process=spawn_extra_main_process)
+
+
 def test_llmapi_example_logits_processor(llm_root, engine_dir, llm_venv):
     _run_llmapi_example(llm_root, engine_dir, llm_venv,
                         "llm_logits_processor.py")
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@@ -163,6 +163,8 @@ l0_dgx_h200:
   - test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b]
   - examples/test_nemotron_nas.py::test_nemotron_nas_summary_2gpu[DeciLM-7B]
   - llmapi/test_llm_examples.py::test_llmapi_example_distributed_tp2
+  - llmapi/test_llm_examples.py::test_llmapi_example_launch_distributed_tp2[spawn_extra_main_process]
+  - llmapi/test_llm_examples.py::test_llmapi_example_launch_distributed_tp2[no_spawn_extra_main_process]
   - unittest/trt/functional/test_allreduce_norm.py
   - examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:2-float16-bs:1-cpp_e2e:False-nb:1]
diff --git a/tests/unittest/llmapi/_run_mpi_comm_task.py b/tests/unittest/llmapi/_run_mpi_comm_task.py
@@ -3,7 +3,8 @@
 
 import click
 
-from tensorrt_llm.llmapi.mpi_session import RemoteMpiCommSessionClient
+from tensorrt_llm.llmapi.mpi_session import (MpiCommSession,
+                                             RemoteMpiCommSessionClient)
 from tensorrt_llm.llmapi.utils import print_colored
 
 
@@ -13,10 +14,15 @@
               default="submit")
 def main(task_type: Literal["submit", "submit_sync"]):
     tasks = [0]
-    assert os.environ[
-        'TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'] is not None, "TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR is not set"
-    client = RemoteMpiCommSessionClient(
-        os.environ['TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'])
+
+    if os.environ.get('TLLM_SPAWN_EXTRA_MAIN_PROCESS', '0') == '1':
+        assert os.environ[
+            'TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'] is not None, "TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR is not set"
+        client = RemoteMpiCommSessionClient(
+            os.environ['TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'])
+    else:
+        client = MpiCommSession(n_workers=2)
+
     for task in tasks:
         if task_type == "submit":
             client.submit(print_colored, f"{task}\n", "green")