computor-org · krystophny · Mar 24, 2026 · Mar 24, 2026
diff --git a/tests/test_server.py b/tests/test_server.py
@@ -167,6 +167,41 @@ def test_basic_completion_request(self):
         assert request.max_tokens is None  # uses _default_max_tokens when None
 
 
+class TestServeCli:
+    """Test serve CLI argument parsing."""
+
+    def test_tool_call_parser_accepts_harmony_aliases(self):
+        """GPT-OSS/Harmony parsers should be selectable from the serve CLI."""
+        from vllm_mlx.cli import create_parser
+
+        parser = create_parser()
+        args = parser.parse_args(
+            [
+                "serve",
+                "lmstudio-community/gpt-oss-20b-MLX-8bit",
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "harmony",
+            ]
+        )
+
+        assert args.command == "serve"
+        assert args.tool_call_parser == "harmony"
+        assert args.enable_auto_tool_choice is True
+
+        args = parser.parse_args(
+            [
+                "serve",
+                "lmstudio-community/gpt-oss-20b-MLX-8bit",
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "gpt-oss",
+            ]
+        )
+
+        assert args.tool_call_parser == "gpt-oss"
+
+
 # =============================================================================
 # Helper Function Tests
 # =============================================================================

diff --git a/vllm_mlx/cli.py b/vllm_mlx/cli.py
@@ -593,7 +593,8 @@ def bench_kv_cache_command(args):
     )
 
 
-def main():
+def create_parser() -> argparse.ArgumentParser:
+    """Build the top-level CLI parser."""
     parser = argparse.ArgumentParser(
         description="vllm-mlx: Apple Silicon MLX backend for vLLM",
         formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -832,6 +833,8 @@ def main():
             "qwen3_coder",
             "llama",
             "hermes",
+            "harmony",
+            "gpt-oss",
             "deepseek",
             "kimi",
             "granite",
@@ -843,7 +846,8 @@ def main():
         help=(
             "Select the tool call parser for the model. Options: "
             "auto (auto-detect), mistral, qwen, qwen3_coder, llama, hermes, "
-            "deepseek, kimi, granite, nemotron, xlam, functionary, glm47. "
+            "harmony, gpt-oss, deepseek, kimi, granite, nemotron, xlam, "
+            "functionary, glm47. "
             "Required for --enable-auto-tool-choice."
         ),
     )
@@ -1023,6 +1027,12 @@ def main():
         help="Quantization group size (default: 64)",
     )
 
+    return parser
+
+
+def main():
+    parser = create_parser()
+
     args = parser.parse_args()
 
     if args.command == "serve":