Skip to content

Commit 03c3f26

Browse files
author
Andrew Xia
committed
getting review ready
Signed-off-by: Andrew Xia <[email protected]>
1 parent c707a49 commit 03c3f26

File tree

5 files changed

+108
-21
lines changed

5 files changed

+108
-21
lines changed

tests/entrypoints/openai/test_response_api_parsable_context.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,89 @@ async def test_reasoning_and_function_items(client: OpenAI, model_name: str):
8585
assert response.output[0].type == "reasoning"
8686
assert response.output[1].type == "message"
8787
assert type(response.output[1].content[0].text) is str
88+
89+
90+
# def get_horoscope(sign):
91+
# return f"{sign}: Next Tuesday you will befriend a baby otter."
92+
93+
# def call_function(name, args):
94+
# if name == "get_horoscope":
95+
# return get_horoscope(**args)
96+
# else:
97+
# raise ValueError(f"Unknown function: {name}")
98+
99+
100+
# TODO: test function tool call
101+
@pytest.mark.asyncio
102+
@pytest.mark.parametrize("model_name", [MODEL_NAME])
103+
async def test_function_calling(client: OpenAI, model_name: str):
104+
tools = [
105+
{
106+
"type": "function",
107+
"name": "get_horoscope",
108+
"description": "Get today's horoscope for an astrological sign.",
109+
"parameters": {
110+
"type": "object",
111+
"properties": {
112+
"sign": {"type": "string"},
113+
},
114+
"required": ["sign"],
115+
"additionalProperties": False,
116+
},
117+
"strict": True,
118+
}
119+
]
120+
121+
response = await client.responses.create(
122+
model=model_name,
123+
input="What is the horoscope for Aquarius today?",
124+
tools=tools,
125+
temperature=0.0,
126+
)
127+
assert response is not None
128+
assert response.status == "completed"
129+
assert len(response.output) == 2
130+
assert response.output[0].type == "reasoning"
131+
assert response.output[1].type == "function_call"
132+
133+
function_call = response.output[1]
134+
assert function_call.name == "get_horoscope"
135+
assert function_call.call_id is not None
136+
137+
# name = function_call.name
138+
139+
# args = json.loads(function_call.arguments)
140+
# assert "sign" in args
141+
142+
# # Step 3: Call the get_horoscope function
143+
# result = call_function(function_call.name, args)
144+
145+
# response_2 = await client.responses.create(
146+
# model=model_name,
147+
# input=[
148+
# {
149+
# "type": "function_call_output",
150+
# "call_id": function_call.call_id,
151+
# "output": str(result),
152+
# }
153+
# ],
154+
# tools=tools,
155+
# previous_response_id=response.id,
156+
# )
157+
# assert response_2 is not None
158+
# assert response_2.status == "completed"
159+
# assert response_2.output_text is not None
160+
161+
# # NOTE: chain-of-thought should be removed.
162+
# response_3 = await client.responses.create(
163+
# model=model_name,
164+
# input="What's the weather like in Paris today?",
165+
# tools=tools,
166+
# previous_response_id=response_2.id,
167+
# )
168+
# assert response_3 is not None
169+
# assert response_3.status == "completed"
170+
# assert response_3.output_text is not None
171+
172+
173+
# TODO: test MCP tool call

vllm/entrypoints/context.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def append_tool_output(self, output: list[ResponseInputOutputItem]) -> None:
254254
def need_builtin_tool_call(self) -> bool:
255255
"""Return true if the last message is a MCP tool call"""
256256
last_message = self.parser.response_messages[-1]
257-
# HACK: figure out which tools are MCP tools
257+
# TODO: figure out which tools are MCP tools
258258
if ( # noqa: SIM103
259259
last_message.type == "function_call"
260260
and (
@@ -297,15 +297,7 @@ async def call_tool(self) -> list[ResponseInputOutputItem]:
297297
return []
298298

299299
def render_for_completion(self):
300-
return [
301-
self.request,
302-
self.tokenizer,
303-
self.parser.response_messages,
304-
self.tool_dicts,
305-
self.tool_parser_cls,
306-
self.chat_template,
307-
self.chat_template_content_format,
308-
]
300+
raise NotImplementedError("Should not be called.")
309301

310302
async def init_tool_sessions(
311303
self,

vllm/entrypoints/openai/serving_engine.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
ParsableContext,
2424
StreamingHarmonyContext,
2525
)
26+
from vllm.entrypoints.openai.protocol import (
27+
FunctionCall,
28+
ResponseInputOutputItem,
29+
ResponsesRequest,
30+
)
2631
from vllm.entrypoints.pooling.classify.protocol import (
2732
ClassificationChatRequest,
2833
ClassificationCompletionRequest,
@@ -44,6 +49,7 @@
4449
ScoreRequest,
4550
ScoreResponse,
4651
)
52+
from vllm.transformers_utils.tokenizer import AnyTokenizer
4753

4854
if sys.version_info >= (3, 12):
4955
from typing import TypedDict
@@ -77,11 +83,9 @@
7783
DetokenizeRequest,
7884
ErrorInfo,
7985
ErrorResponse,
80-
FunctionCall,
8186
FunctionDefinition,
8287
GenerateRequest,
8388
GenerateResponse,
84-
ResponsesRequest,
8589
TokenizeChatRequest,
8690
TokenizeCompletionRequest,
8791
TokenizeResponse,
@@ -1234,13 +1238,13 @@ async def _process_inputs(
12341238

12351239
async def _render_next_turn(
12361240
self,
1237-
request,
1238-
tokenizer,
1239-
messages,
1240-
tool_dicts,
1241+
request: ResponsesRequest,
1242+
tokenizer: AnyTokenizer,
1243+
messages: list[ResponseInputOutputItem],
1244+
tool_dicts: list[dict[str, Any]] | None,
12411245
tool_parser,
1242-
chat_template,
1243-
chat_template_content_format,
1246+
chat_template: str | None,
1247+
chat_template_content_format: ChatTemplateContentFormatOption,
12441248
):
12451249
new_messages = construct_input_messages(
12461250
request_input=messages,

vllm/entrypoints/openai/serving_responses.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from http import HTTPStatus
1313
from typing import Final
1414

15-
import fbvscode
1615
import jinja2
1716
from fastapi import Request
1817
from openai.types.responses import (
@@ -310,7 +309,7 @@ async def create_responses(
310309
| ResponsesResponse
311310
| ErrorResponse
312311
):
313-
fbvscode.set_trace()
312+
# fbvscode.set_trace()
314313
error_check_ret = await self._check_model(request)
315314
if error_check_ret is not None:
316315
logger.error("Error with model %s", error_check_ret)

vllm/entrypoints/responses_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,18 @@ def make_response_output_items_from_parsable_context(
3838
if not isinstance(message, ResponseFunctionToolCallOutputItem):
3939
output_messages.append(message)
4040
else:
41+
if len(output_messages) == 0:
42+
raise ValueError(
43+
"Cannot have a FunctionToolCallOutput before FunctionToolCall."
44+
)
4145
if isinstance(output_messages[-1], ResponseFunctionToolCall):
4246
mcp_message = McpCall(
4347
id=f"mcp_{random_uuid()}",
4448
arguments=output_messages[-1].arguments,
4549
name=output_messages[-1].name,
46-
server_label=output_messages[-1].name, # TODO
50+
server_label=output_messages[
51+
-1
52+
].name, # TODO: store the server label
4753
type="mcp_call",
4854
status="completed",
4955
output=message.output,

0 commit comments

Comments
 (0)