Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ llm_engine.h
### Ruff ###
.ruff_cache/

### MyPy ###
.mypy_cache/

### Python ###
__pycache__/
*.py[cod]
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/profiler/utils/config_modifiers/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ def convert_config(
args = break_arguments(args)

# remove --is-prefill-worker flag
args.remove("--is-prefill-worker")
if "--is-prefill-worker" in args:
args.remove("--is-prefill-worker")

# disable prefix caching
if "--enable-prefix-caching" in args:
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ classifiers = [
]

dependencies = [
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@release/0.4.0",
"networkx",
"pandas",
"pydantic>=2",
Expand Down
36 changes: 20 additions & 16 deletions tests/profiler/test_profile_sla_aiconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class TestProfileSlaAiconfigurator:
"""Test class for profile_sla aiconfigurator functionality."""

@pytest.fixture
def trtllm_args(self, request):
def llm_args(self, request):
class Args:
def __init__(self):
self.model = ""
Expand Down Expand Up @@ -80,12 +80,12 @@ def __init__(self):
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
async def test_aiconfigurator_missing_args(self, llm_args, missing_arg):
# Check that validation error happens when a required arg is missing.
# Note: aic_backend_version is optional - when None, auto-detects latest version
setattr(trtllm_args, missing_arg, None)
setattr(llm_args, missing_arg, None)
with pytest.raises(ValueError):
await run_profile(trtllm_args)
await run_profile(llm_args)

@pytest.mark.pre_merge
@pytest.mark.parallel
Expand All @@ -98,19 +98,19 @@ async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
("aic_backend_version", "0.1.0"),
],
)
async def test_aiconfiguator_no_data(self, trtllm_args, arg_name, bad_value):
async def test_aiconfigurator_no_data(self, llm_args, arg_name, bad_value):
# Check that an appropriate error is raised when the system/model/backend
# is not found in the aiconfigurator database.
setattr(trtllm_args, arg_name, bad_value)
setattr(llm_args, arg_name, bad_value)
with pytest.raises(ValueError, match="Database not found"):
await run_profile(trtllm_args)
await run_profile(llm_args)

@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
# Test that profile_sla works with the model & backend in the trtllm_args fixture.
await run_profile(trtllm_args)
async def test_trtllm_aiconfigurator_single_model(self, llm_args):
# Test that profile_sla works with the model & backend in the llm_args fixture.
await run_profile(llm_args)

@pytest.mark.parallel
@pytest.mark.asyncio
Expand All @@ -120,6 +120,10 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
("trtllm", None),
("trtllm", "0.20.0"),
("trtllm", "1.0.0rc3"),
("vllm", None),
("vllm", "0.11.0"),
("sglang", None),
("sglang", "0.5.1.post1"),
],
)
@pytest.mark.parametrize(
Expand All @@ -129,11 +133,11 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
"meta-llama/Llama-3.1-405B",
],
)
async def test_trtllm_aiconfigurator_many(
self, trtllm_args, hf_model_id, backend, aic_backend_version
async def test_aiconfigurator_dense_models(
self, llm_args, hf_model_id, backend, aic_backend_version
):
# Test that profile_sla works with a variety of backend versions and model names.
trtllm_args.aic_hf_id = hf_model_id
trtllm_args.backend = backend
trtllm_args.aic_backend_version = aic_backend_version
await run_profile(trtllm_args)
llm_args.aic_hf_id = hf_model_id
llm_args.backend = backend
llm_args.aic_backend_version = aic_backend_version
await run_profile(llm_args)
Loading