1414# limitations under the License.
1515"""
1616Stress test script for inference of model using TensorRT LLM with PyTorch/TRT backend.
17- This script is used for stress testing inference performance using trtllm-serve and genai-perf .
17+ This script is used for stress testing inference performance using trtllm-serve and aiperf .
1818
1919The script supports three test modes:
20201. "stress-test": Runs performance test followed by stress test
4848from defs .trt_test_alternative import (Popen , cleanup_process_tree , print_info ,
4949 print_warning )
5050
51- # Install genai-perf in requirements-dev.txt will affect triton and pytorch version mismatch
52- # def genai_perf_install ():
53- # """Ensures genai-perf is installed without affecting the global environment"""
51+ # Install aiperf in requirements-dev.txt will affect triton and pytorch version mismatch
52+ # def aiperf_install ():
53+ # """Ensures aiperf is installed without affecting the global environment"""
5454
5555# import os
5656# import subprocess
6262
6363# if not os.path.exists(requirements_file):
6464# with open(requirements_file, "w") as f:
65- # f.write("genai-perf \n")
65+ # f.write("aiperf \n")
6666
6767# subprocess.check_call(
6868# [sys.executable, "-m", "pip", "install", "-r", requirements_file])
@@ -108,7 +108,7 @@ def __str__(self) -> str:
108108
109109 @property
110110 def model_name (self ) -> str :
111- """Extract model name from model_dir for genai-perf """
111+ """Extract model name from model_dir for aiperf """
112112 return os .path .basename (self .model_dir )
113113
114114
@@ -149,14 +149,14 @@ class StressTestConfig:
149149 @property
150150 def request_count_stress_test (self ) -> int :
151151 """Calculate request count for stress test"""
152- # Cannot set exact stress time in genai-perf test, WR is set the stress_time as customized value to get request count
152+ # Cannot set exact stress time in aiperf test, WR is set the stress_time as customized value to get request count
153153 stress_request_count = self .customized_stress_request_rate * self .customized_stress_time
154154 return stress_request_count
155155
156156
157157@dataclass (frozen = True )
158158class PerformanceParams :
159- """Dataclass to store test parameters for genai-perf """
159+ """Dataclass to store test parameters for aiperf """
160160 input_len_mean : int = 64 # customized for tinyllama and llama-v3-8b-instruct-hf
161161 input_len_std : int = 16
162162 output_len_mean : int = 128 # customized for tinyllama and llama-v3-8b-instruct-hf
@@ -409,7 +409,7 @@ def stress_test(config,
409409 server_config = None ,
410410 stress_time = None ,
411411 stress_timeout = None ):
412- """Test LLM model performance using trtllm-serve and genai-perf .
412+ """Test LLM model performance using trtllm-serve and aiperf .
413413
414414 This function supports multiple testing modes controlled by the --test-mode option:
415415 - "stress-test": Runs the measure capacity stage first, then the stress stage,
@@ -426,10 +426,10 @@ def stress_test(config,
426426 stress_time: Optional stress time in seconds, overrides the default in StressTestConfig
427427 stress_timeout: Optional stress timeout in seconds, overrides the default in StressTestConfig
428428 """
429- # Ensure genai-perf is installed
430- # genai_perf_install ()
431- # Import genai-perf - needed after installation to make sure it's available
432- # import genai_perf # noqa: F401
429+ # Ensure aiperf is installed
430+ # aiperf_install ()
431+ # Import aiperf - needed after installation to make sure it's available
432+ # import aiperf # noqa: F401
433433
434434 # Test mode handling - determine which tests to run
435435 if test_mode == "stress-test" :
@@ -754,7 +754,7 @@ def stress_test(config,
754754 os .unlink (extra_llm_options_path )
755755
756756
757- def create_genai_perf_command (model_name ,
757+ def create_aiperf_command (model_name ,
758758 model_path ,
759759 request_count ,
760760 concurrency ,
@@ -764,7 +764,7 @@ def create_genai_perf_command(model_name,
764764 output_len_std = PerformanceParams .output_len_std ,
765765 warmup_request_count = 10 ):
766766 """
767- Create a command list for genai-perf with standardized parameters.
767+ Create a command list for aiperf with standardized parameters.
768768
769769 Args:
770770 model_name: Name of the model
@@ -778,10 +778,10 @@ def create_genai_perf_command(model_name,
778778 warmup_request_count: Number of warmup requests
779779
780780 Returns:
781- List of command-line arguments for genai-perf
781+ List of command-line arguments for aiperf
782782 """
783783 return [
784- "genai-perf " ,
784+ "aiperf " ,
785785 "profile" ,
786786 "-m" ,
787787 model_name ,
@@ -809,16 +809,16 @@ def create_genai_perf_command(model_name,
809809 ]
810810
811811
812- def run_genai_perf_process (cmd ,
812+ def run_aiperf_process (cmd ,
813813 test_start_time ,
814814 test_timeout ,
815815 server_config ,
816816 request_counter = None ):
817817 """
818- Run a genai-perf process and monitor both the process and server health.
818+ Run a aiperf process and monitor both the process and server health.
819819
820820 Args:
821- cmd: Command list to execute genai-perf
821+ cmd: Command list to execute aiperf
822822 test_start_time: Start time of the test
823823 test_timeout: Timeout for the test in seconds
824824 server_config: Server configuration object
@@ -827,7 +827,7 @@ def run_genai_perf_process(cmd,
827827 Returns:
828828 Boolean indicating whether the process completed successfully
829829 """
830- # Start genai-perf process with our context manager
830+ # Start aiperf process with our context manager
831831 with launch_process (cmd ,
832832 start_new_session = True ,
833833 filter_pattern = None ,
@@ -836,16 +836,16 @@ def run_genai_perf_process(cmd,
836836 last_health_check = time .time ()
837837 process_completed = False
838838
839- # Monitor both the server and genai-perf process
839+ # Monitor both the server and aiperf process
840840 while process .poll () is None :
841841 current_time = time .time ()
842842
843- # Check if genai-perf is still running but exceeded timeout
843+ # Check if aiperf is still running but exceeded timeout
844844 elapsed_time = current_time - test_start_time
845845 if elapsed_time > test_timeout :
846846 cleanup_process_tree (process , has_session = True )
847847 raise RuntimeError (
848- f"genai-perf test timed out after { test_timeout } seconds" )
848+ f"aiperf test timed out after { test_timeout } seconds" )
849849
850850 # Check server health periodically
851851 if current_time - last_health_check > server_config .health_check_timeout :
@@ -869,20 +869,20 @@ def run_genai_perf_process(cmd,
869869
870870 time .sleep (0.5 )
871871
872- # Check final status of genai-perf process
872+ # Check final status of aiperf process
873873 retcode = process .poll ()
874874 if retcode is not None :
875875 if retcode != 0 :
876876 cleanup_process_tree (process , has_session = True )
877877 raise RuntimeError (
878- f"genai-perf exited with non-zero code: { retcode } " )
878+ f"aiperf exited with non-zero code: { retcode } " )
879879 else :
880- print_info ("genai-perf completed successfully" )
880+ print_info ("aiperf completed successfully" )
881881 process_completed = True
882882 else :
883883 cleanup_process_tree (process , has_session = True )
884884 raise RuntimeError (
885- "genai-perf did not complete normally, will terminate" )
885+ "aiperf did not complete normally, will terminate" )
886886
887887 return process_completed
888888
@@ -921,8 +921,8 @@ def measure_capacity_stage(model_name,
921921 f"Running test { test_index + 1 } /{ total_tests } : concurrency={ concurrency } , request_count={ request_count } "
922922 )
923923
924- # Prepare genai-perf command
925- cmd = create_genai_perf_command (
924+ # Prepare aiperf command
925+ cmd = create_aiperf_command (
926926 model_name = model_name ,
927927 model_path = model_path ,
928928 request_count = request_count ,
@@ -933,8 +933,8 @@ def measure_capacity_stage(model_name,
933933 output_len_std = performance_params .output_len_std ,
934934 warmup_request_count = 10 )
935935
936- # Run genai-perf process
937- process_completed = run_genai_perf_process (
936+ # Run aiperf process
937+ process_completed = run_aiperf_process (
938938 cmd , test_start_time , performance_params .test_timeout ,
939939 server_config , request_counter )
940940
@@ -1016,8 +1016,8 @@ def stress_stage(model_name,
10161016 if request_counter :
10171017 request_counter .reset ()
10181018
1019- # Prepare genai-perf command
1020- cmd = create_genai_perf_command (
1019+ # Prepare aiperf command
1020+ cmd = create_aiperf_command (
10211021 model_name = model_name ,
10221022 model_path = model_path ,
10231023 request_count = request_count ,
@@ -1028,8 +1028,8 @@ def stress_stage(model_name,
10281028 output_len_std = PerformanceParams .output_len_std ,
10291029 warmup_request_count = 10 )
10301030
1031- # Start genai-perf process
1032- process_completed = run_genai_perf_process (cmd , test_start_time ,
1031+ # Start aiperf process
1032+ process_completed = run_aiperf_process (cmd , test_start_time ,
10331033 test_timeout , server_config ,
10341034 request_counter )
10351035
@@ -1183,14 +1183,14 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
11831183 artifacts_dir (str): Path to the artifacts directory
11841184 current_model (str, optional): If provided, only analyze artifacts for this model
11851185 """
1186- # Find all profile_export_genai_perf .json files in the artifacts directory
1186+ # Find all profile_export_aiperf .json files in the artifacts directory
11871187 json_files = glob (os .path .join (artifacts_dir ,
1188- "**/profile_export_genai_perf .json" ),
1188+ "**/profile_export_aiperf .json" ),
11891189 recursive = True )
11901190
11911191 if not json_files :
11921192 raise RuntimeError (
1193- "No profile_export_genai_perf .json files found in the artifacts directory"
1193+ "No profile_export_aiperf .json files found in the artifacts directory"
11941194 )
11951195
11961196 # Get a list of directory names in the artifacts directory
@@ -1308,7 +1308,7 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
13081308 range_val = max_val - min_val
13091309 if range_val == 0 :
13101310 raise ValueError (
1311- "Please check OutputTokenThroughput from genai-perf " )
1311+ "Please check OutputTokenThroughput from aiperf " )
13121312 else :
13131313 normalized_df .loc [
13141314 normalized_df ["Model" ] == model_name ,
0 commit comments