ai-dynamo · jasonqinzhou · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -77,6 +77,9 @@ llm_engine.h
 ### Ruff ###
 .ruff_cache/
 
+### MyPy ###
+.mypy_cache/
+
 ### Python ###
 __pycache__/
 *.py[cod]

@@ -123,7 +123,8 @@ def convert_config(
             args = break_arguments(args)
 
             # remove --is-prefill-worker flag
-            args.remove("--is-prefill-worker")
+            if "--is-prefill-worker" in args:
+                args.remove("--is-prefill-worker")
 
             # disable prefix caching
             if "--enable-prefix-caching" in args:

diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
+    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@release/0.4.0",
     "networkx",
     "pandas",
     "pydantic>=2",

@@ -37,7 +37,7 @@ class TestProfileSlaAiconfigurator:
     """Test class for profile_sla aiconfigurator functionality."""
 
     @pytest.fixture
-    def trtllm_args(self, request):
+    def llm_args(self, request):
         class Args:
             def __init__(self):
                 self.model = ""
@@ -80,12 +80,12 @@ def __init__(self):
     @pytest.mark.parallel
     @pytest.mark.asyncio
     @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
-    async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
+    async def test_aiconfigurator_missing_args(self, llm_args, missing_arg):
         # Check that validation error happens when a required arg is missing.
         # Note: aic_backend_version is optional - when None, auto-detects latest version
-        setattr(trtllm_args, missing_arg, None)
+        setattr(llm_args, missing_arg, None)
         with pytest.raises(ValueError):
-            await run_profile(trtllm_args)
+            await run_profile(llm_args)
 
     @pytest.mark.pre_merge
     @pytest.mark.parallel
@@ -98,19 +98,19 @@ async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
             ("aic_backend_version", "0.1.0"),
         ],
     )
-    async def test_aiconfiguator_no_data(self, trtllm_args, arg_name, bad_value):
+    async def test_aiconfigurator_no_data(self, llm_args, arg_name, bad_value):
         # Check that an appropriate error is raised when the system/model/backend
         # is not found in the aiconfigurator database.
-        setattr(trtllm_args, arg_name, bad_value)
+        setattr(llm_args, arg_name, bad_value)
         with pytest.raises(ValueError, match="Database not found"):
-            await run_profile(trtllm_args)
+            await run_profile(llm_args)
 
     @pytest.mark.pre_merge
     @pytest.mark.parallel
     @pytest.mark.asyncio
-    async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
-        # Test that profile_sla works with the model & backend in the trtllm_args fixture.
-        await run_profile(trtllm_args)
+    async def test_trtllm_aiconfigurator_single_model(self, llm_args):
+        # Test that profile_sla works with the model & backend in the llm_args fixture.
+        await run_profile(llm_args)
 
     @pytest.mark.parallel
     @pytest.mark.asyncio
@@ -120,6 +120,10 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
             ("trtllm", None),
             ("trtllm", "0.20.0"),
             ("trtllm", "1.0.0rc3"),
+            ("vllm", None),
+            ("vllm", "0.11.0"),
+            ("sglang", None),
+            ("sglang", "0.5.1.post1"),
         ],
     )
     @pytest.mark.parametrize(
@@ -129,11 +133,11 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
             "meta-llama/Llama-3.1-405B",
         ],
     )
-    async def test_trtllm_aiconfigurator_many(
-        self, trtllm_args, hf_model_id, backend, aic_backend_version
+    async def test_aiconfigurator_dense_models(
+        self, llm_args, hf_model_id, backend, aic_backend_version
     ):
         # Test that profile_sla works with a variety of backend versions and model names.
-        trtllm_args.aic_hf_id = hf_model_id
-        trtllm_args.backend = backend
-        trtllm_args.aic_backend_version = aic_backend_version
-        await run_profile(trtllm_args)
+        llm_args.aic_hf_id = hf_model_id
+        llm_args.backend = backend
+        llm_args.aic_backend_version = aic_backend_version
+        await run_profile(llm_args)