Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,41 @@ def test_basic_completion_request(self):
assert request.max_tokens is None # uses _default_max_tokens when None


class TestServeCli:
"""Test serve CLI argument parsing."""

def test_tool_call_parser_accepts_harmony_aliases(self):
"""GPT-OSS/Harmony parsers should be selectable from the serve CLI."""
from vllm_mlx.cli import create_parser

parser = create_parser()
args = parser.parse_args(
[
"serve",
"lmstudio-community/gpt-oss-20b-MLX-8bit",
"--enable-auto-tool-choice",
"--tool-call-parser",
"harmony",
]
)

assert args.command == "serve"
assert args.tool_call_parser == "harmony"
assert args.enable_auto_tool_choice is True

args = parser.parse_args(
[
"serve",
"lmstudio-community/gpt-oss-20b-MLX-8bit",
"--enable-auto-tool-choice",
"--tool-call-parser",
"gpt-oss",
]
)

assert args.tool_call_parser == "gpt-oss"


# =============================================================================
# Helper Function Tests
# =============================================================================
Expand Down
14 changes: 12 additions & 2 deletions vllm_mlx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,8 @@ def bench_kv_cache_command(args):
)


def main():
def create_parser() -> argparse.ArgumentParser:
"""Build the top-level CLI parser."""
parser = argparse.ArgumentParser(
description="vllm-mlx: Apple Silicon MLX backend for vLLM",
formatter_class=argparse.RawDescriptionHelpFormatter,
Expand Down Expand Up @@ -832,6 +833,8 @@ def main():
"qwen3_coder",
"llama",
"hermes",
"harmony",
"gpt-oss",
"deepseek",
"kimi",
"granite",
Expand All @@ -843,7 +846,8 @@ def main():
help=(
"Select the tool call parser for the model. Options: "
"auto (auto-detect), mistral, qwen, qwen3_coder, llama, hermes, "
"deepseek, kimi, granite, nemotron, xlam, functionary, glm47. "
"harmony, gpt-oss, deepseek, kimi, granite, nemotron, xlam, "
"functionary, glm47. "
"Required for --enable-auto-tool-choice."
),
)
Expand Down Expand Up @@ -1023,6 +1027,12 @@ def main():
help="Quantization group size (default: 64)",
)

return parser


def main():
parser = create_parser()

args = parser.parse_args()

if args.command == "serve":
Expand Down