Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /usr/bin/env bash

genai-perf profile \
aiperf profile \
-m TinyLlama-1.1B-Chat-v1.0 \
--tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
--endpoint-type chat \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#! /usr/bin/env bash

genai-perf profile \
aiperf profile \
-m Qwen2.5-VL-3B-Instruct \
--tokenizer Qwen/Qwen2.5-VL-3B-Instruct \
--endpoint-type multimodal \
--endpoint-type chat \
--random-seed 123 \
--image-width-mean 64 \
--image-height-mean 64 \
Expand Down
2 changes: 1 addition & 1 deletion examples/serve/requirements.txt
Copy link
Member

@kaiyux kaiyux Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume we need to update ATTRIBUTIONS-Python.md for this PR? @chzblych @NVIDIA/trt-llm-oss-compliance

Original file line number Diff line number Diff line change
@@ -1 +1 @@
genai-perf
aiperf
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ pytest-rerunfailures
ruff==0.9.4
lm_eval[api]==0.4.8
docstring_parser
genai-perf==0.0.13
aiperf==0.2.0
80 changes: 40 additions & 40 deletions tests/integration/defs/stress_test/stress_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
"""
Stress test script for inference of model using TensorRT LLM with PyTorch/TRT backend.
This script is used for stress testing inference performance using trtllm-serve and genai-perf.
This script is used for stress testing inference performance using trtllm-serve and aiperf.

The script supports three test modes:
1. "stress-test": Runs performance test followed by stress test
Expand Down Expand Up @@ -48,9 +48,9 @@
from defs.trt_test_alternative import (Popen, cleanup_process_tree, print_info,
print_warning)

# Install genai-perf in requirements-dev.txt will affect triton and pytorch version mismatch
# def genai_perf_install():
# """Ensures genai-perf is installed without affecting the global environment"""
# Install aiperf in requirements-dev.txt will affect triton and pytorch version mismatch
# def aiperf_install():
# """Ensures aiperf is installed without affecting the global environment"""

# import os
# import subprocess
Expand All @@ -62,7 +62,7 @@

# if not os.path.exists(requirements_file):
# with open(requirements_file, "w") as f:
# f.write("genai-perf\n")
# f.write("aiperf\n")

# subprocess.check_call(
# [sys.executable, "-m", "pip", "install", "-r", requirements_file])
Expand Down Expand Up @@ -108,7 +108,7 @@ def __str__(self) -> str:

@property
def model_name(self) -> str:
"""Extract model name from model_dir for genai-perf"""
"""Extract model name from model_dir for aiperf"""
return os.path.basename(self.model_dir)


Expand Down Expand Up @@ -149,14 +149,14 @@ class StressTestConfig:
@property
def request_count_stress_test(self) -> int:
"""Calculate request count for stress test"""
# Cannot set exact stress time in genai-perf test, WR is set the stress_time as customized value to get request count
# Cannot set exact stress time in aiperf test, WR is set the stress_time as customized value to get request count
stress_request_count = self.customized_stress_request_rate * self.customized_stress_time
return stress_request_count


@dataclass(frozen=True)
class PerformanceParams:
"""Dataclass to store test parameters for genai-perf"""
"""Dataclass to store test parameters for aiperf"""
input_len_mean: int = 64 # customized for tinyllama and llama-v3-8b-instruct-hf
input_len_std: int = 16
output_len_mean: int = 128 # customized for tinyllama and llama-v3-8b-instruct-hf
Expand Down Expand Up @@ -409,7 +409,7 @@ def stress_test(config,
server_config=None,
stress_time=None,
stress_timeout=None):
"""Test LLM model performance using trtllm-serve and genai-perf.
"""Test LLM model performance using trtllm-serve and aiperf.

This function supports multiple testing modes controlled by the --test-mode option:
- "stress-test": Runs the measure capacity stage first, then the stress stage,
Expand All @@ -426,10 +426,10 @@ def stress_test(config,
stress_time: Optional stress time in seconds, overrides the default in StressTestConfig
stress_timeout: Optional stress timeout in seconds, overrides the default in StressTestConfig
"""
# Ensure genai-perf is installed
# genai_perf_install()
# Import genai-perf - needed after installation to make sure it's available
# import genai_perf # noqa: F401
# Ensure aiperf is installed
# aiperf_install()
# Import aiperf - needed after installation to make sure it's available
# import aiperf # noqa: F401

# Test mode handling - determine which tests to run
if test_mode == "stress-test":
Expand Down Expand Up @@ -754,7 +754,7 @@ def stress_test(config,
os.unlink(extra_llm_options_path)


def create_genai_perf_command(model_name,
def create_aiperf_command(model_name,
model_path,
request_count,
concurrency,
Expand All @@ -764,7 +764,7 @@ def create_genai_perf_command(model_name,
output_len_std=PerformanceParams.output_len_std,
warmup_request_count=10):
"""
Create a command list for genai-perf with standardized parameters.
Create a command list for aiperf with standardized parameters.

Args:
model_name: Name of the model
Expand All @@ -778,10 +778,10 @@ def create_genai_perf_command(model_name,
warmup_request_count: Number of warmup requests

Returns:
List of command-line arguments for genai-perf
List of command-line arguments for aiperf
"""
return [
"genai-perf",
"aiperf",
"profile",
"-m",
model_name,
Expand Down Expand Up @@ -809,16 +809,16 @@ def create_genai_perf_command(model_name,
]


def run_genai_perf_process(cmd,
def run_aiperf_process(cmd,
test_start_time,
test_timeout,
server_config,
request_counter=None):
"""
Run a genai-perf process and monitor both the process and server health.
Run a aiperf process and monitor both the process and server health.

Args:
cmd: Command list to execute genai-perf
cmd: Command list to execute aiperf
test_start_time: Start time of the test
test_timeout: Timeout for the test in seconds
server_config: Server configuration object
Expand All @@ -827,7 +827,7 @@ def run_genai_perf_process(cmd,
Returns:
Boolean indicating whether the process completed successfully
"""
# Start genai-perf process with our context manager
# Start aiperf process with our context manager
with launch_process(cmd,
start_new_session=True,
filter_pattern=None,
Expand All @@ -836,16 +836,16 @@ def run_genai_perf_process(cmd,
last_health_check = time.time()
process_completed = False

# Monitor both the server and genai-perf process
# Monitor both the server and aiperf process
while process.poll() is None:
current_time = time.time()

# Check if genai-perf is still running but exceeded timeout
# Check if aiperf is still running but exceeded timeout
elapsed_time = current_time - test_start_time
if elapsed_time > test_timeout:
cleanup_process_tree(process, has_session=True)
raise RuntimeError(
f"genai-perf test timed out after {test_timeout} seconds")
f"aiperf test timed out after {test_timeout} seconds")

# Check server health periodically
if current_time - last_health_check > server_config.health_check_timeout:
Expand All @@ -869,20 +869,20 @@ def run_genai_perf_process(cmd,

time.sleep(0.5)

# Check final status of genai-perf process
# Check final status of aiperf process
retcode = process.poll()
if retcode is not None:
if retcode != 0:
cleanup_process_tree(process, has_session=True)
raise RuntimeError(
f"genai-perf exited with non-zero code: {retcode}")
f"aiperf exited with non-zero code: {retcode}")
else:
print_info("genai-perf completed successfully")
print_info("aiperf completed successfully")
process_completed = True
else:
cleanup_process_tree(process, has_session=True)
raise RuntimeError(
"genai-perf did not complete normally, will terminate")
"aiperf did not complete normally, will terminate")

return process_completed

Expand Down Expand Up @@ -921,8 +921,8 @@ def measure_capacity_stage(model_name,
f"Running test {test_index+1}/{total_tests}: concurrency={concurrency}, request_count={request_count}"
)

# Prepare genai-perf command
cmd = create_genai_perf_command(
# Prepare aiperf command
cmd = create_aiperf_command(
model_name=model_name,
model_path=model_path,
request_count=request_count,
Expand All @@ -933,8 +933,8 @@ def measure_capacity_stage(model_name,
output_len_std=performance_params.output_len_std,
warmup_request_count=10)

# Run genai-perf process
process_completed = run_genai_perf_process(
# Run aiperf process
process_completed = run_aiperf_process(
cmd, test_start_time, performance_params.test_timeout,
server_config, request_counter)

Expand Down Expand Up @@ -1016,8 +1016,8 @@ def stress_stage(model_name,
if request_counter:
request_counter.reset()

# Prepare genai-perf command
cmd = create_genai_perf_command(
# Prepare aiperf command
cmd = create_aiperf_command(
model_name=model_name,
model_path=model_path,
request_count=request_count,
Expand All @@ -1028,8 +1028,8 @@ def stress_stage(model_name,
output_len_std=PerformanceParams.output_len_std,
warmup_request_count=10)

# Start genai-perf process
process_completed = run_genai_perf_process(cmd, test_start_time,
# Start aiperf process
process_completed = run_aiperf_process(cmd, test_start_time,
test_timeout, server_config,
request_counter)

Expand Down Expand Up @@ -1183,14 +1183,14 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
artifacts_dir (str): Path to the artifacts directory
current_model (str, optional): If provided, only analyze artifacts for this model
"""
# Find all profile_export_genai_perf.json files in the artifacts directory
# Find all profile_export_aiperf.json files in the artifacts directory
json_files = glob(os.path.join(artifacts_dir,
"**/profile_export_genai_perf.json"),
"**/profile_export_aiperf.json"),
recursive=True)

if not json_files:
raise RuntimeError(
"No profile_export_genai_perf.json files found in the artifacts directory"
"No profile_export_aiperf.json files found in the artifacts directory"
)

# Get a list of directory names in the artifacts directory
Expand Down Expand Up @@ -1308,7 +1308,7 @@ def extract_stress_test_metrics(artifacts_dir="./artifacts",
range_val = max_val - min_val
if range_val == 0:
raise ValueError(
"Please check OutputTokenThroughput from genai-perf")
"Please check OutputTokenThroughput from aiperf")
else:
normalized_df.loc[
normalized_df["Model"] == model_name,
Expand Down
2 changes: 1 addition & 1 deletion tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def example_root():
("python3", "openai_completion_client_json_schema.py"),
("bash", "curl_chat_client.sh"),
("bash", "curl_completion_client.sh"),
("bash", "genai_perf_client.sh")])
("bash", "aiperf_client.sh")])
def test_trtllm_serve_examples(exe: str, script: str,
server: RemoteOpenAIServer, example_root: str):
client_script = os.path.join(example_root, script)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def example_root():

@pytest.mark.parametrize("exe, script",
[("python3", "openai_chat_client_for_multimodal.py"),
("bash", "genai_perf_client_for_multimodal.sh")])
("bash", "aiperf_client_for_multimodal.sh")])
def test_trtllm_serve_examples(exe: str, script: str,
server: RemoteOpenAIServer, example_root: str):
client_script = os.path.join(example_root, script)
Expand Down