diff --git a/tests/test_server.py b/tests/test_server.py index 9fb86a3e..81d74b95 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -167,6 +167,41 @@ def test_basic_completion_request(self): assert request.max_tokens is None # uses _default_max_tokens when None +class TestServeCli: + """Test serve CLI argument parsing.""" + + def test_tool_call_parser_accepts_harmony_aliases(self): + """GPT-OSS/Harmony parsers should be selectable from the serve CLI.""" + from vllm_mlx.cli import create_parser + + parser = create_parser() + args = parser.parse_args( + [ + "serve", + "lmstudio-community/gpt-oss-20b-MLX-8bit", + "--enable-auto-tool-choice", + "--tool-call-parser", + "harmony", + ] + ) + + assert args.command == "serve" + assert args.tool_call_parser == "harmony" + assert args.enable_auto_tool_choice is True + + args = parser.parse_args( + [ + "serve", + "lmstudio-community/gpt-oss-20b-MLX-8bit", + "--enable-auto-tool-choice", + "--tool-call-parser", + "gpt-oss", + ] + ) + + assert args.tool_call_parser == "gpt-oss" + + # ============================================================================= # Helper Function Tests # ============================================================================= diff --git a/vllm_mlx/cli.py b/vllm_mlx/cli.py index 8a90bc9b..7f9ea088 100644 --- a/vllm_mlx/cli.py +++ b/vllm_mlx/cli.py @@ -593,7 +593,8 @@ def bench_kv_cache_command(args): ) -def main(): +def create_parser() -> argparse.ArgumentParser: + """Build the top-level CLI parser.""" parser = argparse.ArgumentParser( description="vllm-mlx: Apple Silicon MLX backend for vLLM", formatter_class=argparse.RawDescriptionHelpFormatter, @@ -832,6 +833,8 @@ def main(): "qwen3_coder", "llama", "hermes", + "harmony", + "gpt-oss", "deepseek", "kimi", "granite", @@ -843,7 +846,8 @@ def main(): help=( "Select the tool call parser for the model. Options: " "auto (auto-detect), mistral, qwen, qwen3_coder, llama, hermes, " - "deepseek, kimi, granite, nemotron, xlam, functionary, glm47. " + "harmony, gpt-oss, deepseek, kimi, granite, nemotron, xlam, " + "functionary, glm47. " "Required for --enable-auto-tool-choice." ), ) @@ -1023,6 +1027,12 @@ def main(): help="Quantization group size (default: 64)", ) + return parser + + +def main(): + parser = create_parser() + args = parser.parse_args() if args.command == "serve":