Skip to content

Commit dd23bae

Browse files
committed
fix: dedent structural tag block and preserve response format in system-instructions mode
Fix two bugs identified in PR review: 1. The structural tag setup block was nested inside the `else` branch of `if self.use_harmony:`, making it unreachable for GPT-OSS (the primary target). Dedent the block so it runs unconditionally after context selection. 2. The `# Response Formats` schema section was lost when VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS was enabled, because get_developer_message() dropped all instructions in that mode. Add a separate response_format_section parameter so the schema is always included in the developer message regardless of the system-instructions flag. Signed-off-by: Will Deines <will@garr.io>
1 parent 4fa603e commit dd23bae

3 files changed

Lines changed: 136 additions & 56 deletions

File tree

tests/entrypoints/openai/parser/test_harmony_utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from unittest.mock import patch
5+
46
import pytest
57
from openai_harmony import Message, Role
68

79
from tests.entrypoints.openai.utils import verify_harmony_messages
810
from vllm.entrypoints.openai.parser.harmony_utils import (
911
auto_drop_analysis_messages,
12+
get_developer_message,
1013
get_encoding,
1114
get_system_message,
1215
has_custom_tools,
@@ -955,3 +958,73 @@ def test_compact_json_no_spaces(self):
955958
def test_section_separated_by_blank_lines(self):
956959
result = inject_response_formats("Instructions here.", {"type": "object"})
957960
assert "\n\n# Response Formats\n\n## structured_output\n\n" in result
961+
962+
963+
class TestGetDeveloperMessageResponseFormats:
964+
"""Tests for response_format_section parameter in get_developer_message."""
965+
966+
ENV_VAR = (
967+
"vllm.entrypoints.openai.parser.harmony_utils"
968+
".envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS"
969+
)
970+
971+
def _extract_instructions_text(self, dev_msg: Message) -> str | None:
972+
"""Extract the raw text from a developer message's instructions."""
973+
for content_item in dev_msg.content:
974+
instructions = getattr(content_item, "instructions", None)
975+
if instructions is not None:
976+
return instructions
977+
return None
978+
979+
def test_response_format_preserved_with_system_instructions(self):
980+
"""When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is True,
981+
user instructions should be dropped but response format schema
982+
should still appear in the developer message."""
983+
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
984+
with patch(self.ENV_VAR, True):
985+
dev_msg = get_developer_message(
986+
instructions="Be concise.",
987+
response_format_section=schema_section,
988+
)
989+
text = self._extract_instructions_text(dev_msg)
990+
assert text is not None
991+
assert "# Response Formats" in text
992+
# User instructions should NOT be present
993+
assert "Be concise." not in text
994+
995+
def test_response_format_and_instructions_without_system_instructions(self):
996+
"""When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is False,
997+
both instructions and response format schema should appear."""
998+
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
999+
with patch(self.ENV_VAR, False):
1000+
dev_msg = get_developer_message(
1001+
instructions="Be concise.",
1002+
response_format_section=schema_section,
1003+
)
1004+
text = self._extract_instructions_text(dev_msg)
1005+
assert text is not None
1006+
assert "Be concise." in text
1007+
assert "# Response Formats" in text
1008+
1009+
def test_response_format_only_no_instructions(self):
1010+
"""With instructions=None, only the response format section appears."""
1011+
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
1012+
with patch(self.ENV_VAR, False):
1013+
dev_msg = get_developer_message(
1014+
instructions=None,
1015+
response_format_section=schema_section,
1016+
)
1017+
text = self._extract_instructions_text(dev_msg)
1018+
assert text is not None
1019+
assert "# Response Formats" in text
1020+
1021+
def test_backward_compat_no_response_format(self):
1022+
"""Without response_format_section, behavior matches the original."""
1023+
with patch(self.ENV_VAR, False):
1024+
dev_msg = get_developer_message(
1025+
instructions="Be concise.",
1026+
)
1027+
text = self._extract_instructions_text(dev_msg)
1028+
assert text is not None
1029+
assert "Be concise." in text
1030+
assert "# Response Formats" not in text

vllm/entrypoints/openai/parser/harmony_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,16 @@ def create_tool_definition(tool: ChatCompletionToolsParam | Tool):
122122
def get_developer_message(
123123
instructions: str | None = None,
124124
tools: list[Tool | ChatCompletionToolsParam] | None = None,
125+
response_format_section: str | None = None,
125126
) -> Message:
126127
dev_msg_content = DeveloperContent.new()
128+
parts: list[str] = []
127129
if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
128-
dev_msg_content = dev_msg_content.with_instructions(instructions)
130+
parts.append(instructions)
131+
if response_format_section is not None:
132+
parts.append(response_format_section)
133+
if parts:
134+
dev_msg_content = dev_msg_content.with_instructions("\n\n".join(parts))
129135
if tools is not None:
130136
function_tools: list[Tool | ChatCompletionToolsParam] = []
131137
for tool in tools:

vllm/entrypoints/openai/responses/serving.py

Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -519,61 +519,61 @@ async def create_responses(
519519
else:
520520
context = SimpleContext()
521521

522-
if self.parser and self.parser.reasoning_parser_cls is not None:
523-
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
524-
struct_out = sampling_params.structured_outputs
525-
526-
if isinstance(struct_out, StructuredOutputsParams):
527-
if struct_out.all_non_structural_tag_constraints_none():
528-
# No content constraint — just apply reasoning
529-
# channel tags
530-
sampling_params.structured_outputs = replace(
531-
struct_out,
532-
structural_tag=(
533-
reasoning_parser.prepare_structured_tag(
534-
struct_out.structural_tag,
535-
self.tool_server,
536-
)
537-
),
538-
)
539-
else:
540-
# Content constraint present (json, regex,
541-
# grammar, choice, json_object). Embed it in the
542-
# final channel tag within the structural tag.
543-
content_fmt = _constraint_to_content_format(struct_out)
544-
if content_fmt is not None:
545-
structural_tag = (
546-
reasoning_parser.prepare_structured_tag(
547-
None,
548-
self.tool_server,
549-
final_content_format=content_fmt,
550-
)
522+
if self.parser and self.parser.reasoning_parser_cls is not None:
523+
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
524+
struct_out = sampling_params.structured_outputs
525+
526+
if isinstance(struct_out, StructuredOutputsParams):
527+
if struct_out.all_non_structural_tag_constraints_none():
528+
# No content constraint — just apply reasoning
529+
# channel tags
530+
sampling_params.structured_outputs = replace(
531+
struct_out,
532+
structural_tag=(
533+
reasoning_parser.prepare_structured_tag(
534+
struct_out.structural_tag,
535+
self.tool_server,
551536
)
552-
if structural_tag is not None:
553-
# Clear content constraints, set
554-
# structural_tag, but preserve options
555-
# like disable_any_whitespace.
556-
sampling_params.structured_outputs = replace(
557-
struct_out,
558-
json=None,
559-
regex=None,
560-
choice=None,
561-
grammar=None,
562-
json_object=None,
563-
structural_tag=structural_tag,
564-
)
565-
elif struct_out is None:
566-
# No structured output requested, but still need
567-
# reasoning channel tags
568-
tag = reasoning_parser.prepare_structured_tag(
569-
None, self.tool_server
537+
),
570538
)
571-
if tag is not None:
572-
sampling_params.structured_outputs = (
573-
StructuredOutputsParams(
574-
structural_tag=tag # type: ignore[call-arg]
539+
else:
540+
# Content constraint present (json, regex,
541+
# grammar, choice, json_object). Embed it in the
542+
# final channel tag within the structural tag.
543+
content_fmt = _constraint_to_content_format(struct_out)
544+
if content_fmt is not None:
545+
structural_tag = (
546+
reasoning_parser.prepare_structured_tag(
547+
None,
548+
self.tool_server,
549+
final_content_format=content_fmt,
575550
)
576551
)
552+
if structural_tag is not None:
553+
# Clear content constraints, set
554+
# structural_tag, but preserve options
555+
# like disable_any_whitespace.
556+
sampling_params.structured_outputs = replace(
557+
struct_out,
558+
json=None,
559+
regex=None,
560+
choice=None,
561+
grammar=None,
562+
json_object=None,
563+
structural_tag=structural_tag,
564+
)
565+
elif struct_out is None:
566+
# No structured output requested, but still need
567+
# reasoning channel tags
568+
tag = reasoning_parser.prepare_structured_tag(
569+
None, self.tool_server
570+
)
571+
if tag is not None:
572+
sampling_params.structured_outputs = (
573+
StructuredOutputsParams(
574+
structural_tag=tag # type: ignore[call-arg]
575+
)
576+
)
577577
generator = self._generate_with_builtin_tools(
578578
request_id=request.request_id,
579579
engine_prompt=engine_prompt,
@@ -1242,14 +1242,15 @@ def _construct_input_messages_with_harmony(
12421242
needs_dev_msg = with_custom_tools or response_format_schema is not None
12431243

12441244
if needs_dev_msg:
1245-
dev_instructions = request.instructions
1245+
response_format_text = None
12461246
if response_format_schema is not None:
1247-
dev_instructions = inject_response_formats(
1248-
dev_instructions, response_format_schema
1247+
response_format_text = inject_response_formats(
1248+
None, response_format_schema
12491249
)
12501250
dev_msg = get_developer_message(
1251-
instructions=dev_instructions,
1251+
instructions=request.instructions,
12521252
tools=request.tools if with_custom_tools else None,
1253+
response_format_section=response_format_text,
12531254
)
12541255
messages.append(dev_msg)
12551256
messages += construct_harmony_previous_input_messages(request)

0 commit comments

Comments
 (0)