Skip to content

Commit c8ffd30

Browse files
lkomalikaiyux
authored andcommitted
feat: Replace GenAI-Perf with AIPerf
Signed-off-by: lkomali <[email protected]>
1 parent 0e746fa commit c8ffd30

File tree

7 files changed

+44
-79
lines changed

7 files changed

+44
-79
lines changed

examples/serve/genai_perf_client.sh

Lines changed: 0 additions & 16 deletions
This file was deleted.

examples/serve/genai_perf_client_for_multimodal.sh

Lines changed: 0 additions & 19 deletions
This file was deleted.

examples/serve/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
genai-perf
1+
aiperf

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ pytest-rerunfailures
3030
ruff==0.9.4
3131
lm_eval[api]==0.4.8
3232
docstring_parser
33-
genai-perf==0.0.13
33+
aiperf==0.2.0

tests/integration/defs/stress_test/stress_test.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515
"""
1616
Stress test script for inference of model using TensorRT LLM with PyTorch/TRT backend.
17-
This script is used for stress testing inference performance using trtllm-serve and genai-perf.
17+
This script is used for stress testing inference performance using trtllm-serve and aiperf.
1818
1919
The script supports three test modes:
2020
1. "stress-test": Runs performance test followed by stress test
@@ -48,9 +48,9 @@
4848
from defs.trt_test_alternative import (Popen, cleanup_process_tree, print_info,
4949
print_warning)
5050

51-
# Install genai-perf in requirements-dev.txt will affect triton and pytorch version mismatch
52-
# def genai_perf_install():
53-
# """Ensures genai-perf is installed without affecting the global environment"""
51+
# Install aiperf in requirements-dev.txt will affect triton and pytorch version mismatch
52+
# def aiperf_install():
53+
# """Ensures aiperf is installed without affecting the global environment"""
5454

5555
# import os
5656
# import subprocess
@@ -62,7 +62,7 @@
6262

6363
# if not os.path.exists(requirements_file):
6464
# with open(requirements_file, "w") as f:
65-
# f.write("genai-perf\n")
65+
# f.write("aiperf\n")
6666

6767
# subprocess.check_call(
6868
# [sys.executable, "-m", "pip", "install", "-r", requirements_file])
@@ -108,7 +108,7 @@ def __str__(self) -> str:
108108

109109
@property
110110
def model_name(self) -> str:
111-
"""Extract model name from model_dir for genai-perf"""
111+
"""Extract model name from model_dir for aiperf"""
112112
return os.path.basename(self.model_dir)
113113

114114

@@ -149,14 +149,14 @@ class StressTestConfig:
149149
@property
150150
def request_count_stress_test(self) -> int:
151151
"""Calculate request count for stress test"""
152-
# Cannot set exact stress time in genai-perf test, WR is set the stress_time as customized value to get request count
152+
# Cannot set exact stress time in aiperf test, WR is set the stress_time as customized value to get request count
153153
stress_request_count = self.customized_stress_request_rate * self.customized_stress_time
154154
return stress_request_count
155155

156156

157157
@dataclass(frozen=True)
158158
class PerformanceParams:
159-
"""Dataclass to store test parameters for genai-perf"""
159+
"""Dataclass to store test parameters for aiperf"""
160160
input_len_mean: int = 64 # customized for tinyllama and llama-v3-8b-instruct-hf
161161
input_len_std: int = 16
162162
output_len_mean: int = 128 # customized for tinyllama and llama-v3-8b-instruct-hf
@@ -409,7 +409,7 @@ def stress_test(config,
409409
server_config=None,
410410
stress_time=None,
411411
stress_timeout=None):
412-
"""Test LLM model performance using trtllm-serve and genai-perf.
412+
"""Test LLM model performance using trtllm-serve and aiperf.
413413
414414
This function supports multiple testing modes controlled by the --test-mode option:
415415
- "stress-test": Runs the measure capacity stage first, then the stress stage,
@@ -426,10 +426,10 @@ def stress_test(config,
426426
stress_time: Optional stress time in seconds, overrides the default in StressTestConfig
427427
stress_timeout: Optional stress timeout in seconds, overrides the default in StressTestConfig
428428
"""
429-
# Ensure genai-perf is installed
430-
# genai_perf_install()
431-
# Import genai-perf - needed after installation to make sure it's available
432-
# import genai_perf # noqa: F401
429+
# Ensure aiperf is installed
430+
# aiperf_install()
431+
# Import aiperf - needed after installation to make sure it's available
432+
# import aiperf # noqa: F401
433433

434434
# Test mode handling - determine which tests to run
435435
if test_mode == "stress-test":
@@ -754,7 +754,7 @@ def stress_test(config,
754754
os.unlink(extra_llm_options_path)
755755

756756

757-
def create_genai_perf_command(model_name,
757+
def create_aiperf_command(model_name,
758758
model_path,
759759
request_count,
760760
concurrency,
@@ -764,7 +764,7 @@ def create_genai_perf_command(model_name,
764764
output_len_std=PerformanceParams.output_len_std,
765765
warmup_request_count=10):
766766
"""
767-
Create a command list for genai-perf with standardized parameters.
767+
Create a command list for aiperf with standardized parameters.
768768
769769
Args:
770770
model_name: Name of the model
@@ -778,10 +778,10 @@ def create_genai_perf_command(model_name,
778778
warmup_request_count: Number of warmup requests
779779
780780
Returns:
781-
List of command-line arguments for genai-perf
781+
List of command-line arguments for aiperf
782782
"""
783783
return [
784-
"genai-perf",
784+
"aiperf",
785785
"profile",
786786
"-m",
787787
model_name,
@@ -809,16 +809,16 @@ def create_genai_perf_command(model_name,
809809
]
810810

811811

812-
def run_genai_perf_process(cmd,
812+
def run_aiperf_process(cmd,
813813
test_start_time,
814814
test_timeout,
815815
server_config,
816816
request_counter=None):
817817
"""
818-
Run a genai-perf process and monitor both the process and server health.
818+
Run a aiperf process and monitor both the process and server health.
819819
820820
Args:
821-
cmd: Command list to execute genai-perf
821+
cmd: Command list to execute aiperf
822822
test_start_time: Start time of the test
823823
test_timeout: Timeout for the test in seconds
824824
server_config: Server configuration object
@@ -827,7 +827,7 @@ def run_genai_perf_process(cmd,
827827
Returns:
828828
Boolean indicating whether the process completed successfully
829829
"""
830-
# Start genai-perf process with our context manager
830+
# Start aiperf process with our context manager
831831
with launch_process(cmd,
832832
start_new_session=True,
833833
filter_pattern=None,
@@ -836,16 +836,16 @@ def run_genai_perf_process(cmd,
836836
last_health_check = time.time()
837837
process_completed = False
838838

839-
# Monitor both the server and genai-perf process
839+
# Monitor both the server and aiperf process
840840
while process.poll() is None:
841841
current_time = time.time()
842842

843-
# Check if genai-perf is still running but exceeded timeout
843+
# Check if aiperf is still running but exceeded timeout
844844
elapsed_time = current_time - test_start_time
845845
if elapsed_time > test_timeout:
846846
cleanup_process_tree(process, has_session=True)
847847
raise RuntimeError(
848-
f"genai-perf test timed out after {test_timeout} seconds")
848+
f"aiperf test timed out after {test_timeout} seconds")
849849

850850
# Check server health periodically
851851
if current_time - last_health_check > server_config.health_check_timeout:
@@ -869,20 +869,20 @@ def run_genai_perf_process(cmd,
869869

870870
time.sleep(0.5)
871871

872-
# Check final status of genai-perf process
872+
# Check final status of aiperf process
873873
retcode = process.poll()
874874
if retcode is not None:
875875
if retcode != 0:
876876
cleanup_process_tree(process, has_session=True)
877877
raise RuntimeError(
878-
f"genai-perf exited with non-zero code: {retcode}")
878+
f"aiperf exited with non-zero code: {retcode}")
879879
else:
880-
print_info("genai-perf completed successfully")
880+
print_info("aiperf completed successfully")
881881
process_completed = True
882882
else:
883883
cleanup_process_tree(process, has_session=True)
884884
raise RuntimeError(
885-
"genai-perf did not complete normally, will terminate")
885+
"aiperf did not complete normally, will terminate")
886886

887887
return process_completed
888888

@@ -921,8 +921,8 @@ def measure_capacity_stage(model_name,
921921
f"Running test {test_index+1}/{total_tests}: concurrency={concurrency}, request_count={request_count}"
922922
)
923923

924-
# Prepare genai-perf command
925-
cmd = create_genai_perf_command(
924+
# Prepare aiperf command
925+
cmd = create_aiperf_command(
926926
model_name=model_name,
927927
model_path=model_path,
928928
request_count=request_count,
@@ -933,8 +933,8 @@ def measure_capacity_stage(model_name,
933933
output_len_std=performance_params.output_len_std,
934934
warmup_request_count=10)
935935

936-
# Run genai-perf process
937-
process_completed = run_genai_perf_process(
936+
# Run aiperf process
937+
process_completed = run_aiperf_process(
938938
cmd, test_start_time, performance_params.test_timeout,
939939
server_config, request_counter)
940940

@@ -1016,8 +1016,8 @@ def stress_stage(model_name,
10161016
if request_counter:
10171017
request_counter.reset()
10181018

1019-
# Prepare genai-perf command
1020-
cmd = create_genai_perf_command(
1019+
# Prepare aiperf command
1020+
cmd = create_aiperf_command(
10211021
model_name=model_name,
10221022
model_path=model_path,
10231023
request_count=request_count,
@@ -1028,8 +1028,8 @@ def stress_stage(model_name,
10281028
output_len_std=PerformanceParams.output_len_std,
10291029
warmup_request_count=10)
10301030

1031-
# Start genai-perf process
1032-
process_completed = run_genai_perf_process(cmd, test_start_time,
1031+
# Start aiperf process
1032+
process_completed = run_aiperf_process(cmd, test_start_time,
10331033
test_timeout, server_config,
10341034
request_counter)
10351035

@@ -1183,14 +1183,14 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
11831183
artifacts_dir (str): Path to the artifacts directory
11841184
current_model (str, optional): If provided, only analyze artifacts for this model
11851185
"""
1186-
# Find all profile_export_genai_perf.json files in the artifacts directory
1186+
# Find all profile_export_aiperf.json files in the artifacts directory
11871187
json_files = glob(os.path.join(artifacts_dir,
1188-
"**/profile_export_genai_perf.json"),
1188+
"**/profile_export_aiperf.json"),
11891189
recursive=True)
11901190

11911191
if not json_files:
11921192
raise RuntimeError(
1193-
"No profile_export_genai_perf.json files found in the artifacts directory"
1193+
"No profile_export_aiperf.json files found in the artifacts directory"
11941194
)
11951195

11961196
# Get a list of directory names in the artifacts directory
@@ -1308,7 +1308,7 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
13081308
range_val = max_val - min_val
13091309
if range_val == 0:
13101310
raise ValueError(
1311-
"Please check OutputTokenThroughput from genai-perf")
1311+
"Please check OutputTokenThroughput from aiperf")
13121312
else:
13131313
normalized_df.loc[
13141314
normalized_df["Model"] == model_name,

tests/unittest/llmapi/apps/_test_trtllm_serve_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def example_root():
5454
("python3", "openai_completion_client_json_schema.py"),
5555
("bash", "curl_chat_client.sh"),
5656
("bash", "curl_completion_client.sh"),
57-
("bash", "genai_perf_client.sh")])
57+
("bash", "aiperf_client.sh")])
5858
def test_trtllm_serve_examples(exe: str, script: str,
5959
server: RemoteOpenAIServer, example_root: str):
6060
client_script = os.path.join(example_root, script)

tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def example_root():
6060

6161
@pytest.mark.parametrize("exe, script",
6262
[("python3", "openai_chat_client_for_multimodal.py"),
63-
("bash", "genai_perf_client_for_multimodal.sh")])
63+
("bash", "aiperf_client_for_multimodal.sh")])
6464
def test_trtllm_serve_examples(exe: str, script: str,
6565
server: RemoteOpenAIServer, example_root: str):
6666
client_script = os.path.join(example_root, script)

0 commit comments

Comments
 (0)