getting review ready

Andrew Xia · Andrew Xia · commit 03c3f2608177 · 2025-12-02T20:08:18.000-08:00
Signed-off-by: Andrew Xia &lt;axia@fb.com&gt;
diff --git a/tests/entrypoints/openai/test_response_api_parsable_context.py b/tests/entrypoints/openai/test_response_api_parsable_context.py
@@ -85,3 +85,89 @@ async def test_reasoning_and_function_items(client: OpenAI, model_name: str):
     assert response.output[0].type == "reasoning"
     assert response.output[1].type == "message"
     assert type(response.output[1].content[0].text) is str
+
+
+# def get_horoscope(sign):
+#     return f"{sign}: Next Tuesday you will befriend a baby otter."
+
+# def call_function(name, args):
+#     if name == "get_horoscope":
+#         return get_horoscope(**args)
+#     else:
+#         raise ValueError(f"Unknown function: {name}")
+
+
+# TODO: test function tool call
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_function_calling(client: OpenAI, model_name: str):
+    tools = [
+        {
+            "type": "function",
+            "name": "get_horoscope",
+            "description": "Get today's horoscope for an astrological sign.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "sign": {"type": "string"},
+                },
+                "required": ["sign"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        }
+    ]
+
+    response = await client.responses.create(
+        model=model_name,
+        input="What is the horoscope for Aquarius today?",
+        tools=tools,
+        temperature=0.0,
+    )
+    assert response is not None
+    assert response.status == "completed"
+    assert len(response.output) == 2
+    assert response.output[0].type == "reasoning"
+    assert response.output[1].type == "function_call"
+
+    function_call = response.output[1]
+    assert function_call.name == "get_horoscope"
+    assert function_call.call_id is not None
+
+    # name = function_call.name
+
+    # args = json.loads(function_call.arguments)
+    # assert "sign" in args
+
+    # # Step 3: Call the get_horoscope function
+    # result = call_function(function_call.name, args)
+
+    # response_2 = await client.responses.create(
+    #     model=model_name,
+    #     input=[
+    #         {
+    #             "type": "function_call_output",
+    #             "call_id": function_call.call_id,
+    #             "output": str(result),
+    #         }
+    #     ],
+    #     tools=tools,
+    #     previous_response_id=response.id,
+    # )
+    # assert response_2 is not None
+    # assert response_2.status == "completed"
+    # assert response_2.output_text is not None
+
+    # # NOTE: chain-of-thought should be removed.
+    # response_3 = await client.responses.create(
+    #     model=model_name,
+    #     input="What's the weather like in Paris today?",
+    #     tools=tools,
+    #     previous_response_id=response_2.id,
+    # )
+    # assert response_3 is not None
+    # assert response_3.status == "completed"
+    # assert response_3.output_text is not None
+
+
+# TODO: test MCP tool call
diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py
@@ -254,7 +254,7 @@ def append_tool_output(self, output: list[ResponseInputOutputItem]) -> None:
     def need_builtin_tool_call(self) -> bool:
         """Return true if the last message is a MCP tool call"""
         last_message = self.parser.response_messages[-1]
-        # HACK: figure out which tools are MCP tools
+        # TODO: figure out which tools are MCP tools
         if (  # noqa: SIM103
             last_message.type == "function_call"
             and (
@@ -297,15 +297,7 @@ async def call_tool(self) -> list[ResponseInputOutputItem]:
         return []
 
     def render_for_completion(self):
-        return [
-            self.request,
-            self.tokenizer,
-            self.parser.response_messages,
-            self.tool_dicts,
-            self.tool_parser_cls,
-            self.chat_template,
-            self.chat_template_content_format,
-        ]
+        raise NotImplementedError("Should not be called.")
 
     async def init_tool_sessions(
         self,
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
@@ -23,6 +23,11 @@
     ParsableContext,
     StreamingHarmonyContext,
 )
+from vllm.entrypoints.openai.protocol import (
+    FunctionCall,
+    ResponseInputOutputItem,
+    ResponsesRequest,
+)
 from vllm.entrypoints.pooling.classify.protocol import (
     ClassificationChatRequest,
     ClassificationCompletionRequest,
@@ -44,6 +49,7 @@
     ScoreRequest,
     ScoreResponse,
 )
+from vllm.transformers_utils.tokenizer import AnyTokenizer
 
 if sys.version_info >= (3, 12):
     from typing import TypedDict
@@ -77,11 +83,9 @@
     DetokenizeRequest,
     ErrorInfo,
     ErrorResponse,
-    FunctionCall,
     FunctionDefinition,
     GenerateRequest,
     GenerateResponse,
-    ResponsesRequest,
     TokenizeChatRequest,
     TokenizeCompletionRequest,
     TokenizeResponse,
@@ -1234,13 +1238,13 @@ async def _process_inputs(
 
     async def _render_next_turn(
         self,
-        request,
-        tokenizer,
-        messages,
-        tool_dicts,
+        request: ResponsesRequest,
+        tokenizer: AnyTokenizer,
+        messages: list[ResponseInputOutputItem],
+        tool_dicts: list[dict[str, Any]] | None,
         tool_parser,
-        chat_template,
-        chat_template_content_format,
+        chat_template: str | None,
+        chat_template_content_format: ChatTemplateContentFormatOption,
     ):
         new_messages = construct_input_messages(
             request_input=messages,
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
@@ -12,7 +12,6 @@
 from http import HTTPStatus
 from typing import Final
 
-import fbvscode
 import jinja2
 from fastapi import Request
 from openai.types.responses import (
@@ -310,7 +309,7 @@ async def create_responses(
         | ResponsesResponse
         | ErrorResponse
     ):
-        fbvscode.set_trace()
+        # fbvscode.set_trace()
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
             logger.error("Error with model %s", error_check_ret)
diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py
@@ -38,12 +38,18 @@ def make_response_output_items_from_parsable_context(
         if not isinstance(message, ResponseFunctionToolCallOutputItem):
             output_messages.append(message)
         else:
+            if len(output_messages) == 0:
+                raise ValueError(
+                    "Cannot have a FunctionToolCallOutput before FunctionToolCall."
+                )
             if isinstance(output_messages[-1], ResponseFunctionToolCall):
                 mcp_message = McpCall(
                     id=f"mcp_{random_uuid()}",
                     arguments=output_messages[-1].arguments,
                     name=output_messages[-1].name,
-                    server_label=output_messages[-1].name,  # TODO
+                    server_label=output_messages[
+                        -1
+                    ].name,  # TODO: store the server label
                     type="mcp_call",
                     status="completed",
                     output=message.output,