Skip to content

Commit 1d168ce

Browse files
daniel-salibcharlotte12l
authored andcommitted
[Frontend] refactor harmony utils output message parsing (vllm-project#29820)
Signed-off-by: Daniel Salib <[email protected]> Signed-off-by: Xingyu Liu <[email protected]>
1 parent 4c309fc commit 1d168ce

File tree

1 file changed

+117
-99
lines changed

1 file changed

+117
-99
lines changed

vllm/entrypoints/harmony_utils.py

Lines changed: 117 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,105 @@ def render_for_completion(messages: list[Message]) -> list[int]:
328328
return token_ids
329329

330330

331+
def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutputItem:
332+
"""Parse browser tool calls (search, open, find) into web search items."""
333+
if len(message.content) != 1:
334+
raise ValueError("Invalid number of contents in browser message")
335+
content = message.content[0]
336+
337+
# Parse JSON args (with retry detection)
338+
try:
339+
browser_call = json.loads(content.text)
340+
except json.JSONDecodeError:
341+
json_retry_output_message = (
342+
f"Invalid JSON args, caught and retried: {content.text}"
343+
)
344+
browser_call = {
345+
"query": json_retry_output_message,
346+
"url": json_retry_output_message,
347+
"pattern": json_retry_output_message,
348+
}
349+
350+
# Create appropriate action based on recipient
351+
if recipient == "browser.search":
352+
action = ActionSearch(
353+
query=f"cursor:{browser_call.get('query', '')}", type="search"
354+
)
355+
elif recipient == "browser.open":
356+
action = ActionOpenPage(
357+
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
358+
)
359+
elif recipient == "browser.find":
360+
action = ActionFind(
361+
pattern=browser_call.get("pattern", ""),
362+
url=f"cursor:{browser_call.get('url', '')}",
363+
type="find",
364+
)
365+
else:
366+
raise ValueError(f"Unknown browser action: {recipient}")
367+
368+
return ResponseFunctionWebSearch(
369+
id=f"ws_{random_uuid()}",
370+
action=action,
371+
status="completed",
372+
type="web_search_call",
373+
)
374+
375+
376+
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
377+
"""Parse function calls into function tool call items."""
378+
function_name = recipient.split(".")[-1]
379+
output_items = []
380+
for content in message.content:
381+
random_id = random_uuid()
382+
response_item = ResponseFunctionToolCall(
383+
arguments=content.text,
384+
call_id=f"call_{random_id}",
385+
type="function_call",
386+
name=function_name,
387+
id=f"fc_{random_id}",
388+
)
389+
output_items.append(response_item)
390+
return output_items
391+
392+
393+
def _parse_reasoning_content(message: Message) -> list[ResponseOutputItem]:
394+
"""Parse reasoning/analysis content into reasoning items."""
395+
output_items = []
396+
for content in message.content:
397+
reasoning_item = ResponseReasoningItem(
398+
id=f"rs_{random_uuid()}",
399+
summary=[],
400+
type="reasoning",
401+
content=[
402+
ResponseReasoningTextContent(text=content.text, type="reasoning_text")
403+
],
404+
status=None,
405+
)
406+
output_items.append(reasoning_item)
407+
return output_items
408+
409+
410+
def _parse_final_message(message: Message) -> ResponseOutputItem:
411+
"""Parse final channel messages into output message items."""
412+
contents = []
413+
for content in message.content:
414+
output_text = ResponseOutputText(
415+
text=content.text,
416+
annotations=[], # TODO
417+
type="output_text",
418+
logprobs=None, # TODO
419+
)
420+
contents.append(output_text)
421+
return ResponseOutputMessage(
422+
id=f"msg_{random_uuid()}",
423+
content=contents,
424+
role=message.author.role,
425+
status="completed",
426+
type="message",
427+
)
428+
429+
331430
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
332431
"""
333432
Parse a Harmony message into a list of output response items.
@@ -340,119 +439,38 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
340439

341440
output_items: list[ResponseOutputItem] = []
342441
recipient = message.recipient
442+
443+
# Browser tool calls
343444
if recipient is not None and recipient.startswith("browser."):
344-
if len(message.content) != 1:
345-
raise ValueError("Invalid number of contents in browser message")
346-
content = message.content[0]
347-
# We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY
348-
# env variable since if it is not set, we are certain the json is valid
349-
# The use of Actions for web search will be removed entirely in
350-
# the future, so this is only necessary temporarily
351-
try:
352-
browser_call = json.loads(content.text)
353-
except json.JSONDecodeError:
354-
# If the content is not valid JSON, then it was
355-
# caught and retried by vLLM, which means we
356-
# need to make note of that so the user is aware
357-
json_retry_output_message = (
358-
f"Invalid JSON args, caught and retried: {content.text}"
359-
)
360-
browser_call = {
361-
"query": json_retry_output_message,
362-
"url": json_retry_output_message,
363-
"pattern": json_retry_output_message,
364-
}
365-
# TODO: translate to url properly!
366-
if recipient == "browser.search":
367-
action = ActionSearch(
368-
query=f"cursor:{browser_call.get('query', '')}", type="search"
369-
)
370-
elif recipient == "browser.open":
371-
action = ActionOpenPage(
372-
url=f"cursor:{browser_call.get('url', '')}", type="open_page"
373-
)
374-
elif recipient == "browser.find":
375-
action = ActionFind(
376-
pattern=browser_call["pattern"],
377-
url=f"cursor:{browser_call.get('url', '')}",
378-
type="find",
379-
)
380-
else:
381-
raise ValueError(f"Unknown browser action: {recipient}")
382-
web_search_item = ResponseFunctionWebSearch(
383-
id=f"ws_{random_uuid()}",
384-
action=action,
385-
status="completed",
386-
type="web_search_call",
387-
)
388-
output_items.append(web_search_item)
445+
output_items.append(_parse_browser_tool_call(message, recipient))
446+
447+
# Analysis channel (reasoning/chain-of-thought)
389448
elif message.channel == "analysis":
390-
for content in message.content:
391-
reasoning_item = ResponseReasoningItem(
392-
id=f"rs_{random_uuid()}",
393-
summary=[],
394-
type="reasoning",
395-
content=[
396-
ResponseReasoningTextContent(
397-
text=content.text, type="reasoning_text"
398-
)
399-
],
400-
status=None,
401-
)
402-
output_items.append(reasoning_item)
449+
output_items.extend(_parse_reasoning_content(message))
450+
451+
# Commentary channel
403452
elif message.channel == "commentary":
453+
# Function calls
404454
if recipient is not None and recipient.startswith("functions."):
405-
function_name = recipient.split(".")[-1]
406-
for content in message.content:
407-
random_id = random_uuid()
408-
response_item = ResponseFunctionToolCall(
409-
arguments=content.text,
410-
call_id=f"call_{random_id}",
411-
type="function_call",
412-
name=function_name,
413-
id=f"fc_{random_id}",
414-
)
415-
output_items.append(response_item)
455+
output_items.extend(_parse_function_call(message, recipient))
456+
457+
# Built-in tools on commentary channel are treated as reasoning for now
416458
elif recipient is not None and (
417459
recipient.startswith("python")
418460
or recipient.startswith("browser")
419461
or recipient.startswith("container")
420462
):
421-
for content in message.content:
422-
reasoning_item = ResponseReasoningItem(
423-
id=f"rs_{random_uuid()}",
424-
summary=[],
425-
type="reasoning",
426-
content=[
427-
ResponseReasoningTextContent(
428-
text=content.text, type="reasoning_text"
429-
)
430-
],
431-
status=None,
432-
)
433-
output_items.append(reasoning_item)
463+
output_items.extend(_parse_reasoning_content(message))
434464
else:
435465
raise ValueError(f"Unknown recipient: {recipient}")
466+
467+
# Final output message
436468
elif message.channel == "final":
437-
contents = []
438-
for content in message.content:
439-
output_text = ResponseOutputText(
440-
text=content.text,
441-
annotations=[], # TODO
442-
type="output_text",
443-
logprobs=None, # TODO
444-
)
445-
contents.append(output_text)
446-
text_item = ResponseOutputMessage(
447-
id=f"msg_{random_uuid()}",
448-
content=contents,
449-
role=message.author.role,
450-
status="completed",
451-
type="message",
452-
)
453-
output_items.append(text_item)
469+
output_items.append(_parse_final_message(message))
470+
454471
else:
455472
raise ValueError(f"Unknown channel: {message.channel}")
473+
456474
return output_items
457475

458476

0 commit comments

Comments
 (0)