add more tests

Superjomn · Superjomn · commit 47ecd51c8752 · 2025-10-26T07:13:01.000Z
Signed-off-by: Superjomn &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/utils.py b/tensorrt_llm/executor/utils.py
@@ -28,6 +28,22 @@ class LlmLauncherEnvs(StrEnum):
     # Whether to use periodical responses handler in await_responses
     TLLM_EXECUTOR_PERIODICAL_RESP_IN_AWAIT = "TLLM_EXECUTOR_PERIODICAL_RESP_IN_AWAIT"
 
+    # Whether to spawn a additional process for the main process, it will optimize
+    # the performance of the main process. Default is 1.
+    TLLM_SPAWN_EXTRA_MAIN_PROCESS = "TLLM_SPAWN_EXTRA_MAIN_PROCESS"
+
+    # TODO: Add other helpers
+
+    @staticmethod
+    def should_spawn_extra_main_process() -> bool:
+        return os.environ.get(LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS,
+                              '1') == '1'
+
+    @staticmethod
+    def set_spawn_extra_main_process(value: bool = True):
+        os.environ[LlmLauncherEnvs.
+                   TLLM_SPAWN_EXTRA_MAIN_PROCESS] = '1' if value else '0'
+
 
 def get_spawn_proxy_process_ipc_addr_env() -> str | None:
     ''' Get the IPC address for the spawn proxy process dynamically. '''
@@ -49,7 +65,7 @@ def create_mpi_comm_session(
         n_workers: int) -> RemoteMpiCommSessionClient | MpiPoolSession:
     assert mpi_rank(
     ) == 0, f"create_mpi_comm_session must be called by rank 0, but it was called by rank {mpi_rank()}"
-    if get_spawn_proxy_process_env():
+    if LlmLauncherEnvs.should_spawn_extra_main_process():
         assert get_spawn_proxy_process_ipc_addr_env(
         ), f"{LlmLauncherEnvs.TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR} is not set."
         logger_debug(
diff --git a/tests/integration/defs/llmapi/test_llm_examples.py b/tests/integration/defs/llmapi/test_llm_examples.py
@@ -14,12 +14,18 @@
 # limitations under the License.
 
 import os
+import subprocess
+import sys
+import threading
 from pathlib import Path
+from subprocess import PIPE, Popen
 
 import pytest
 from defs.common import venv_check_call
 from defs.conftest import llm_models_root, unittest_path
 
+from tensorrt_llm.executor.utils import LlmLauncherEnvs
+
 
 def test_llmapi_chat_example(llm_root, llm_venv):
     # Test for the examples/apps/chat.py
@@ -40,16 +46,8 @@ def test_llmapi_server_example(llm_root, llm_venv):
 
 
 ### LLMAPI examples
-def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
-                        *args):
-    example_root = Path(llm_root) / "examples" / "llm-api"
-    engine_dir = Path(engine_dir) / "llmapi"
-    if not engine_dir.exists():
-        engine_dir.mkdir(parents=True)
-    examples_script = example_root / script_name
-
-    run_command = [str(examples_script)] + list(args)
-
+def _setup_llmapi_example_softlinks(llm_venv):
+    """Create softlinks for LLM models to avoid duplicated downloading for llm api examples"""
     # Create llm models softlink to avoid duplicated downloading for llm api example
     src_dst_dict = {
         # TinyLlama-1.1B-Chat-v1.0
@@ -87,9 +85,98 @@ def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
                    cnn_dailymail_dst,
                    target_is_directory=True)
 
+
+def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
+                        *args):
+    example_root = Path(llm_root) / "examples" / "llm-api"
+    engine_dir = Path(engine_dir) / "llmapi"
+    if not engine_dir.exists():
+        engine_dir.mkdir(parents=True)
+    examples_script = example_root / script_name
+
+    run_command = [str(examples_script)] + list(args)
+
+    _setup_llmapi_example_softlinks(llm_venv)
+
     venv_check_call(llm_venv, run_command)
 
 
+def _mpirun_llmapi_example(llm_root,
+                           llm_venv,
+                           script_name: str,
+                           tp_size: int,
+                           spawn_extra_main_process: bool = True,
+                           *args):
+    """Run an llmapi example script with mpirun.
+
+    Args:
+        llm_root: Root directory of the LLM project
+        llm_venv: Virtual environment object
+        script_name: Name of the example script to run
+        tp_size: Tensor parallelism size (number of MPI processes)
+        spawn_extra_main_process: Whether to spawn extra main process (default: True)
+        *args: Additional arguments to pass to the example script
+    """
+    example_root = Path(llm_root) / "examples" / "llm-api"
+    examples_script = example_root / script_name
+
+    # Set environment variable for spawn_extra_main_process
+    env_vars = os.environ.copy()
+    LlmLauncherEnvs.set_spawn_extra_main_process(spawn_extra_main_process)
+    env_vars[LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS] = os.environ[
+        LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS]
+
+    run_command = [
+        "mpirun", "-n",
+        str(tp_size), "--oversubscribe", "--allow-run-as-root"
+    ]
+    # Pass environment variables through mpirun
+    for key, value in [(LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS,
+                        env_vars[LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS])
+                       ]:
+        run_command.extend(["-x", f"{key}={value}"])
+    run_command.extend(["python", str(examples_script)] + list(args))
+
+    _setup_llmapi_example_softlinks(llm_venv)
+
+    print(' '.join(run_command))
+
+    with Popen(run_command,
+               env=env_vars,
+               stdout=PIPE,
+               stderr=PIPE,
+               bufsize=1,
+               start_new_session=True,
+               universal_newlines=True,
+               cwd=llm_venv.get_working_directory()) as process:
+
+        # Function to read from a stream and write to output
+        def read_stream(stream, output_stream):
+            for line in stream:
+                output_stream.write(line)
+                output_stream.flush()
+
+        # Create threads to read stdout and stderr concurrently
+        stdout_thread = threading.Thread(target=read_stream,
+                                         args=(process.stdout, sys.stdout))
+        stderr_thread = threading.Thread(target=read_stream,
+                                         args=(process.stderr, sys.stderr))
+
+        # Start both threads
+        stdout_thread.start()
+        stderr_thread.start()
+
+        # Wait for the process to complete
+        return_code = process.wait()
+
+        # Wait for both threads to finish reading
+        stdout_thread.join()
+        stderr_thread.join()
+
+        if return_code != 0:
+            raise subprocess.CalledProcessError(return_code, run_command)
+
+
 def test_llmapi_quickstart(llm_root, engine_dir, llm_venv):
     _run_llmapi_example(llm_root, engine_dir, llm_venv, "quickstart_example.py")
 
@@ -133,6 +220,19 @@ def test_llmapi_example_distributed_tp2(llm_root, engine_dir, llm_venv):
                         "llm_inference_distributed.py")
 
 
+@pytest.mark.skip_less_device(2)
+@pytest.mark.parametrize(
+    "spawn_extra_main_process", [True, False],
+    ids=["spawn_extra_main_process", "no_spawn_extra_main_process"])
+def test_llmapi_example_launch_distributed_tp2(llm_root, llm_venv,
+                                               spawn_extra_main_process: bool):
+    _mpirun_llmapi_example(llm_root,
+                           llm_venv,
+                           "llm_inference_distributed.py",
+                           tp_size=2,
+                           spawn_extra_main_process=spawn_extra_main_process)
+
+
 def test_llmapi_example_logits_processor(llm_root, engine_dir, llm_venv):
     _run_llmapi_example(llm_root, engine_dir, llm_venv,
                         "llm_logits_processor.py")
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h200.yml b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@@ -165,6 +165,8 @@ l0_dgx_h200:
   - test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b]
   - examples/test_nemotron_nas.py::test_nemotron_nas_summary_2gpu[DeciLM-7B]
   - llmapi/test_llm_examples.py::test_llmapi_example_distributed_tp2
+  - llmapi/test_llm_examples.py::test_llmapi_example_launch_distributed_tp2[spawn_extra_main_process]
+  - llmapi/test_llm_examples.py::test_llmapi_example_launch_distributed_tp2[no_spawn_extra_main_process]
   - unittest/trt/functional/test_allreduce_norm.py
   - examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:2-float16-bs:1-cpp_e2e:False-nb:1]
diff --git a/tests/unittest/llmapi/_run_mpi_comm_task.py b/tests/unittest/llmapi/_run_mpi_comm_task.py
@@ -3,7 +3,9 @@
 
 import click
 
-from tensorrt_llm.llmapi.mpi_session import RemoteMpiCommSessionClient
+from tensorrt_llm.executor.utils import LlmLauncherEnvs
+from tensorrt_llm.llmapi.mpi_session import (MpiCommSession,
+                                             RemoteMpiCommSessionClient)
 from tensorrt_llm.llmapi.utils import print_colored
 
 
@@ -13,10 +15,15 @@
               default="submit")
 def main(task_type: Literal["submit", "submit_sync"]):
     tasks = [0]
-    assert os.environ[
-        'TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'] is not None, "TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR is not set"
-    client = RemoteMpiCommSessionClient(
-        os.environ['TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'])
+
+    if LlmLauncherEnvs.should_spawn_extra_main_process():
+        assert os.environ[
+            'TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'] is not None, "TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR is not set"
+        client = RemoteMpiCommSessionClient(
+            os.environ['TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR'])
+    else:
+        client = MpiCommSession(n_workers=2)
+
     for task in tasks:
         if task_type == "submit":
             client.submit(print_colored, f"{task}\n", "green")
diff --git a/tests/unittest/llmapi/test_mpi_session.py b/tests/unittest/llmapi/test_mpi_session.py
@@ -8,6 +8,7 @@
 import pytest
 
 from tensorrt_llm.bindings.BuildInfo import ENABLE_MULTI_DEVICE
+from tensorrt_llm.executor.utils import LlmLauncherEnvs
 from tensorrt_llm.llmapi.mpi_session import (MPINodeState, MpiPoolSession,
                                              RemoteMpiCommSessionClient,
                                              split_mpi_env)
@@ -68,8 +69,9 @@ def test_remote_mpi_session(task_type: Literal["submit", "submit_sync"],
     print(' '.join(command))
 
     envs = os.environ.copy()
-    envs[
-        'TLLM_SPAWN_EXTRA_MAIN_PROCESS'] = "1" if spawn_extra_main_process else "0"
+    LlmLauncherEnvs.set_spawn_extra_main_process(spawn_extra_main_process)
+    envs[LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS] = os.environ[
+        LlmLauncherEnvs.TLLM_SPAWN_EXTRA_MAIN_PROCESS]
     with Popen(command,
                env=envs,
                stdout=PIPE,