-
-
Notifications
You must be signed in to change notification settings - Fork 11.8k
[responsesAPI][5] ResponsesParser with tools for full MCP python loop #29798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7dc5727
c707a49
d136ff7
6cf0d2a
1cee382
da0105e
8984744
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,10 +9,16 @@ | |
| from contextlib import AsyncExitStack | ||
| from typing import TYPE_CHECKING, Union | ||
|
|
||
| from openai.types.responses.response_function_tool_call_output_item import ( | ||
| ResponseFunctionToolCallOutputItem, | ||
| ) | ||
| from openai.types.responses.tool import Mcp | ||
| from openai_harmony import Author, Message, Role, StreamState, TextContent | ||
|
|
||
| from vllm import envs | ||
| from vllm.entrypoints.chat_utils import ( | ||
| ChatTemplateContentFormatOption, | ||
| ) | ||
| from vllm.entrypoints.harmony_utils import ( | ||
| get_encoding, | ||
| get_streamable_parser_for_assistant, | ||
|
|
@@ -22,16 +28,20 @@ | |
| get_responses_parser_for_simple_context, | ||
| ) | ||
| from vllm.entrypoints.openai.protocol import ( | ||
| FunctionCall, | ||
| ResponseInputOutputItem, | ||
| ResponseRawMessageAndToken, | ||
| ResponsesRequest, | ||
| ) | ||
| from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser | ||
| from vllm.entrypoints.responses_utils import construct_tool_dicts | ||
| from vllm.entrypoints.tool import Tool | ||
| from vllm.entrypoints.tool_server import ToolServer | ||
| from vllm.outputs import RequestOutput | ||
| from vllm.reasoning.abs_reasoning_parsers import ReasoningParser | ||
| from vllm.tokenizers.protocol import TokenizerLike | ||
| from vllm.transformers_utils.tokenizer import AnyTokenizer | ||
| from vllm.utils import random_uuid | ||
|
|
||
| if TYPE_CHECKING: | ||
| from mcp.client import ClientSession | ||
|
|
@@ -221,6 +231,10 @@ def __init__( | |
| tokenizer: AnyTokenizer, | ||
| reasoning_parser_cls: Callable[[AnyTokenizer], ReasoningParser] | None, | ||
| request: ResponsesRequest, | ||
| available_tools: list[str] | None, | ||
| tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None, | ||
| chat_template: str | None, | ||
| chat_template_content_format: ChatTemplateContentFormatOption, | ||
| ): | ||
| self.num_prompt_tokens = 0 | ||
| self.num_output_tokens = 0 | ||
|
|
@@ -238,12 +252,19 @@ def __init__( | |
| reasoning_parser_cls=reasoning_parser_cls, | ||
| response_messages=response_messages, | ||
| request=request, | ||
| tool_parser_cls=tool_parser_cls, | ||
| ) | ||
| self.tool_parser_cls = tool_parser_cls | ||
| self.request = request | ||
| self.tokenizer = tokenizer | ||
|
|
||
| self.available_tools = available_tools or [] | ||
| self._tool_sessions: dict[str, ClientSession | Tool] = {} | ||
| self.called_tools: set[str] = set() | ||
|
|
||
| self.tool_dicts = construct_tool_dicts(request.tools, request.tool_choice) | ||
| self.chat_template = chat_template | ||
| self.chat_template_content_format = chat_template_content_format | ||
|
|
||
| def append_output(self, output: RequestOutput) -> None: | ||
| self.num_prompt_tokens = len(output.prompt_token_ids or []) | ||
|
|
@@ -252,14 +273,50 @@ def append_output(self, output: RequestOutput) -> None: | |
| self.parser.process(output.outputs[0]) | ||
|
|
||
| def append_tool_output(self, output: list[ResponseInputOutputItem]) -> None: | ||
| raise NotImplementedError("Should not be called.") | ||
| self.parser.response_messages.extend(output) | ||
|
|
||
| def need_builtin_tool_call(self) -> bool: | ||
| """Return true if the last message is a MCP tool call""" | ||
| last_message = self.parser.response_messages[-1] | ||
| # TODO: figure out which tools are MCP tools | ||
| if ( # noqa: SIM103 | ||
| last_message.type == "function_call" | ||
| and last_message.name in ("code_interpreter", "python") | ||
| ): | ||
| return True | ||
|
|
||
| return False | ||
|
Comment on lines
278
to
288
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this format is quite bad lol. let's directly check the condition. also should we hardcode "code_interpreter", "python" here? i remember @alecsolder made the changes to centralize all tools to go through mcp tool type.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i was thinking to clean up the code in #29989, which will include browser & container tool if that's okay? This PR is just to complete the ability to call only the python tool lol |
||
|
|
||
| async def call_python_tool( | ||
| self, tool_session: Union["ClientSession", Tool], last_msg: FunctionCall | ||
| ) -> list[ResponseInputOutputItem]: | ||
| self.called_tools.add("python") | ||
| if isinstance(tool_session, Tool): | ||
| return await tool_session.get_result_parsable_context(self) | ||
| args = json.loads(last_msg.arguments) | ||
| param = { | ||
| "code": args["code"], | ||
| } | ||
| result = await tool_session.call_tool("python", param) | ||
| result_str = result.content[0].text | ||
|
|
||
| message = ResponseFunctionToolCallOutputItem( | ||
| id=f"fco_{random_uuid()}", | ||
| type="function_call_output", | ||
| call_id=f"call_{random_uuid()}", | ||
| output=result_str, | ||
| status="completed", | ||
| ) | ||
|
|
||
| return [message] | ||
|
|
||
| async def call_tool(self) -> list[ResponseInputOutputItem]: | ||
| raise NotImplementedError("Should not be called.") | ||
| if not self.parser.response_messages: | ||
| return [] | ||
| last_msg = self.parser.response_messages[-1] | ||
| if last_msg.name == "code_interpreter": | ||
| return await self.call_python_tool(self._tool_sessions["python"], last_msg) | ||
| return [] | ||
|
|
||
| def render_for_completion(self): | ||
| raise NotImplementedError("Should not be called.") | ||
|
|
@@ -271,11 +328,38 @@ async def init_tool_sessions( | |
| request_id: str, | ||
| mcp_tools: dict[str, Mcp], | ||
| ): | ||
| pass | ||
| if tool_server: | ||
| for tool_name in self.available_tools: | ||
| if tool_name in self._tool_sessions: | ||
| continue | ||
|
|
||
| tool_type = _map_tool_name_to_tool_type(tool_name) | ||
| headers = ( | ||
| mcp_tools[tool_type].headers if tool_type in mcp_tools else None | ||
| ) | ||
| tool_session = await exit_stack.enter_async_context( | ||
| tool_server.new_session(tool_name, request_id, headers) | ||
| ) | ||
| self._tool_sessions[tool_name] = tool_session | ||
| exit_stack.push_async_exit(self.cleanup_session) | ||
|
|
||
| async def cleanup_session(self, *args, **kwargs) -> None: | ||
| """Can be used as coro to used in __aexit__""" | ||
| raise NotImplementedError("Should not be called.") | ||
|
|
||
| async def cleanup_tool_session(tool_session): | ||
| if not isinstance(tool_session, Tool): | ||
| logger.info( | ||
| "Cleaning up tool session for %s", tool_session._client_info | ||
| ) | ||
| with contextlib.suppress(Exception): | ||
| await tool_session.call_tool("cleanup_session", {}) | ||
|
|
||
| await asyncio.gather( | ||
| *( | ||
| cleanup_tool_session(self._tool_sessions[tool]) | ||
| for tool in self.called_tools | ||
| ) | ||
| ) | ||
|
|
||
|
|
||
| class HarmonyContext(ConversationContext): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh why was this commented before? did it have issues with ci?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i left it there in a previous PR because we didn't have tool calling yet, so it wasn't necessary yet. There weren't any CI issues