1414# limitations under the License.
1515
1616import os
17+ import subprocess
18+ import sys
19+ import threading
1720from pathlib import Path
21+ from subprocess import PIPE , Popen
1822
1923import pytest
2024from defs .common import venv_check_call
2125from defs .conftest import llm_models_root , unittest_path
2226
27+ from tensorrt_llm .executor .utils import LlmLauncherEnvs
28+
2329
2430def test_llmapi_chat_example (llm_root , llm_venv ):
2531 # Test for the examples/apps/chat.py
@@ -40,16 +46,8 @@ def test_llmapi_server_example(llm_root, llm_venv):
4046
4147
4248### LLMAPI examples
43- def _run_llmapi_example (llm_root , engine_dir , llm_venv , script_name : str ,
44- * args ):
45- example_root = Path (llm_root ) / "examples" / "llm-api"
46- engine_dir = Path (engine_dir ) / "llmapi"
47- if not engine_dir .exists ():
48- engine_dir .mkdir (parents = True )
49- examples_script = example_root / script_name
50-
51- run_command = [str (examples_script )] + list (args )
52-
49+ def _setup_llmapi_example_softlinks (llm_venv ):
50+ """Create softlinks for LLM models to avoid duplicated downloading for llm api examples"""
5351 # Create llm models softlink to avoid duplicated downloading for llm api example
5452 src_dst_dict = {
5553 # TinyLlama-1.1B-Chat-v1.0
@@ -87,9 +85,98 @@ def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
8785 cnn_dailymail_dst ,
8886 target_is_directory = True )
8987
88+
89+ def _run_llmapi_example (llm_root , engine_dir , llm_venv , script_name : str ,
90+ * args ):
91+ example_root = Path (llm_root ) / "examples" / "llm-api"
92+ engine_dir = Path (engine_dir ) / "llmapi"
93+ if not engine_dir .exists ():
94+ engine_dir .mkdir (parents = True )
95+ examples_script = example_root / script_name
96+
97+ run_command = [str (examples_script )] + list (args )
98+
99+ _setup_llmapi_example_softlinks (llm_venv )
100+
90101 venv_check_call (llm_venv , run_command )
91102
92103
104+ def _mpirun_llmapi_example (llm_root ,
105+ llm_venv ,
106+ script_name : str ,
107+ tp_size : int ,
108+ spawn_extra_main_process : bool = True ,
109+ * args ):
110+ """Run an llmapi example script with mpirun.
111+
112+ Args:
113+ llm_root: Root directory of the LLM project
114+ llm_venv: Virtual environment object
115+ script_name: Name of the example script to run
116+ tp_size: Tensor parallelism size (number of MPI processes)
117+ spawn_extra_main_process: Whether to spawn extra main process (default: True)
118+ *args: Additional arguments to pass to the example script
119+ """
120+ example_root = Path (llm_root ) / "examples" / "llm-api"
121+ examples_script = example_root / script_name
122+
123+ # Set environment variable for spawn_extra_main_process
124+ env_vars = os .environ .copy ()
125+ LlmLauncherEnvs .set_spawn_extra_main_process (spawn_extra_main_process )
126+ env_vars [LlmLauncherEnvs .TLLM_SPAWN_EXTRA_MAIN_PROCESS ] = os .environ [
127+ LlmLauncherEnvs .TLLM_SPAWN_EXTRA_MAIN_PROCESS ]
128+
129+ run_command = [
130+ "mpirun" , "-n" ,
131+ str (tp_size ), "--oversubscribe" , "--allow-run-as-root"
132+ ]
133+ # Pass environment variables through mpirun
134+ for key , value in [(LlmLauncherEnvs .TLLM_SPAWN_EXTRA_MAIN_PROCESS ,
135+ env_vars [LlmLauncherEnvs .TLLM_SPAWN_EXTRA_MAIN_PROCESS ])
136+ ]:
137+ run_command .extend (["-x" , f"{ key } ={ value } " ])
138+ run_command .extend (["python" , str (examples_script )] + list (args ))
139+
140+ _setup_llmapi_example_softlinks (llm_venv )
141+
142+ print (' ' .join (run_command ))
143+
144+ with Popen (run_command ,
145+ env = env_vars ,
146+ stdout = PIPE ,
147+ stderr = PIPE ,
148+ bufsize = 1 ,
149+ start_new_session = True ,
150+ universal_newlines = True ,
151+ cwd = llm_venv .get_working_directory ()) as process :
152+
153+ # Function to read from a stream and write to output
154+ def read_stream (stream , output_stream ):
155+ for line in stream :
156+ output_stream .write (line )
157+ output_stream .flush ()
158+
159+ # Create threads to read stdout and stderr concurrently
160+ stdout_thread = threading .Thread (target = read_stream ,
161+ args = (process .stdout , sys .stdout ))
162+ stderr_thread = threading .Thread (target = read_stream ,
163+ args = (process .stderr , sys .stderr ))
164+
165+ # Start both threads
166+ stdout_thread .start ()
167+ stderr_thread .start ()
168+
169+ # Wait for the process to complete
170+ return_code = process .wait ()
171+
172+ # Wait for both threads to finish reading
173+ stdout_thread .join ()
174+ stderr_thread .join ()
175+
176+ if return_code != 0 :
177+ raise subprocess .CalledProcessError (return_code , run_command )
178+
179+
93180def test_llmapi_quickstart (llm_root , engine_dir , llm_venv ):
94181 _run_llmapi_example (llm_root , engine_dir , llm_venv , "quickstart_example.py" )
95182
@@ -133,6 +220,19 @@ def test_llmapi_example_distributed_tp2(llm_root, engine_dir, llm_venv):
133220 "llm_inference_distributed.py" )
134221
135222
223+ @pytest .mark .skip_less_device (2 )
224+ @pytest .mark .parametrize (
225+ "spawn_extra_main_process" , [True , False ],
226+ ids = ["spawn_extra_main_process" , "no_spawn_extra_main_process" ])
227+ def test_llmapi_example_launch_distributed_tp2 (llm_root , llm_venv ,
228+ spawn_extra_main_process : bool ):
229+ _mpirun_llmapi_example (llm_root ,
230+ llm_venv ,
231+ "llm_inference_distributed.py" ,
232+ tp_size = 2 ,
233+ spawn_extra_main_process = spawn_extra_main_process )
234+
235+
136236def test_llmapi_example_logits_processor (llm_root , engine_dir , llm_venv ):
137237 _run_llmapi_example (llm_root , engine_dir , llm_venv ,
138238 "llm_logits_processor.py" )
0 commit comments