1414# limitations under the License.
1515
1616import os
17+ import subprocess
18+ import sys
19+ import threading
1720from pathlib import Path
21+ from subprocess import PIPE , Popen
1822
1923import pytest
2024from defs .common import venv_check_call
@@ -40,16 +44,8 @@ def test_llmapi_server_example(llm_root, llm_venv):
4044
4145
4246### LLMAPI examples
43- def _run_llmapi_example (llm_root , engine_dir , llm_venv , script_name : str ,
44- * args ):
45- example_root = Path (llm_root ) / "examples" / "llm-api"
46- engine_dir = Path (engine_dir ) / "llmapi"
47- if not engine_dir .exists ():
48- engine_dir .mkdir (parents = True )
49- examples_script = example_root / script_name
50-
51- run_command = [str (examples_script )] + list (args )
52-
47+ def _setup_llmapi_example_softlinks (llm_venv ):
48+ """Create softlinks for LLM models to avoid duplicated downloading for llm api examples"""
5349 # Create llm models softlink to avoid duplicated downloading for llm api example
5450 src_dst_dict = {
5551 # TinyLlama-1.1B-Chat-v1.0
@@ -87,9 +83,96 @@ def _run_llmapi_example(llm_root, engine_dir, llm_venv, script_name: str,
8783 cnn_dailymail_dst ,
8884 target_is_directory = True )
8985
86+
87+ def _run_llmapi_example (llm_root , engine_dir , llm_venv , script_name : str ,
88+ * args ):
89+ example_root = Path (llm_root ) / "examples" / "llm-api"
90+ engine_dir = Path (engine_dir ) / "llmapi"
91+ if not engine_dir .exists ():
92+ engine_dir .mkdir (parents = True )
93+ examples_script = example_root / script_name
94+
95+ run_command = [str (examples_script )] + list (args )
96+
97+ _setup_llmapi_example_softlinks (llm_venv )
98+
9099 venv_check_call (llm_venv , run_command )
91100
92101
102+ def _mpirun_llmapi_example (llm_root ,
103+ llm_venv ,
104+ script_name : str ,
105+ tp_size : int ,
106+ spawn_extra_main_process : bool = True ,
107+ * args ):
108+ """Run an llmapi example script with mpirun.
109+
110+ Args:
111+ llm_root: Root directory of the LLM project
112+ llm_venv: Virtual environment object
113+ script_name: Name of the example script to run
114+ tp_size: Tensor parallelism size (number of MPI processes)
115+ spawn_extra_main_process: Whether to spawn extra main process (default: True)
116+ *args: Additional arguments to pass to the example script
117+ """
118+ example_root = Path (llm_root ) / "examples" / "llm-api"
119+ examples_script = example_root / script_name
120+
121+ # Set environment variable for spawn_extra_main_process
122+ env_vars = os .environ .copy ()
123+ env_vars [
124+ 'TLLM_SPAWN_EXTRA_MAIN_PROCESS' ] = "1" if spawn_extra_main_process else "0"
125+
126+ run_command = [
127+ "mpirun" , "-n" ,
128+ str (tp_size ), "--oversubscribe" , "--allow-run-as-root"
129+ ]
130+ # Pass environment variables through mpirun
131+ for key , value in [('TLLM_SPAWN_EXTRA_MAIN_PROCESS' ,
132+ env_vars ['TLLM_SPAWN_EXTRA_MAIN_PROCESS' ])]:
133+ run_command .extend (["-x" , f"{ key } ={ value } " ])
134+ run_command .extend (["python" , str (examples_script )] + list (args ))
135+
136+ _setup_llmapi_example_softlinks (llm_venv )
137+
138+ print (' ' .join (run_command ))
139+
140+ with Popen (run_command ,
141+ env = env_vars ,
142+ stdout = PIPE ,
143+ stderr = PIPE ,
144+ bufsize = 1 ,
145+ start_new_session = True ,
146+ universal_newlines = True ,
147+ cwd = llm_venv .get_working_directory ()) as process :
148+
149+ # Function to read from a stream and write to output
150+ def read_stream (stream , output_stream ):
151+ for line in stream :
152+ output_stream .write (line )
153+ output_stream .flush ()
154+
155+ # Create threads to read stdout and stderr concurrently
156+ stdout_thread = threading .Thread (target = read_stream ,
157+ args = (process .stdout , sys .stdout ))
158+ stderr_thread = threading .Thread (target = read_stream ,
159+ args = (process .stderr , sys .stderr ))
160+
161+ # Start both threads
162+ stdout_thread .start ()
163+ stderr_thread .start ()
164+
165+ # Wait for the process to complete
166+ return_code = process .wait ()
167+
168+ # Wait for both threads to finish reading
169+ stdout_thread .join ()
170+ stderr_thread .join ()
171+
172+ if return_code != 0 :
173+ raise subprocess .CalledProcessError (return_code , run_command )
174+
175+
93176def test_llmapi_quickstart (llm_root , engine_dir , llm_venv ):
94177 _run_llmapi_example (llm_root , engine_dir , llm_venv , "quickstart_example.py" )
95178
@@ -133,6 +216,19 @@ def test_llmapi_example_distributed_tp2(llm_root, engine_dir, llm_venv):
133216 "llm_inference_distributed.py" )
134217
135218
219+ @pytest .mark .skip_less_device (2 )
220+ @pytest .mark .parametrize (
221+ "spawn_extra_main_process" , [True , False ],
222+ ids = ["spawn_extra_main_process" , "no_spawn_extra_main_process" ])
223+ def test_llmapi_example_launch_distributed_tp2 (llm_root , llm_venv ,
224+ spawn_extra_main_process : bool ):
225+ _mpirun_llmapi_example (llm_root ,
226+ llm_venv ,
227+ "llm_inference_distributed.py" ,
228+ tp_size = 2 ,
229+ spawn_extra_main_process = spawn_extra_main_process )
230+
231+
136232def test_llmapi_example_logits_processor (llm_root , engine_dir , llm_venv ):
137233 _run_llmapi_example (llm_root , engine_dir , llm_venv ,
138234 "llm_logits_processor.py" )
0 commit comments