diff --git a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py index eb9798980f06..ad9fed1d355a 100644 --- a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py +++ b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py @@ -635,3 +635,143 @@ def test_redacted_thinking_block_is_accepted(self): # Redacted thinking is ignored, normal thinking still becomes reasoning. assert asst.get("reasoning") == "Thinking..." assert asst.get("content") == "Hi!" + + +class TestInlineSystemMessageInMessagesArray: + """Verify that ``role: system`` messages embedded inside the ``messages`` + array are accepted and merged with the top-level ``system`` prompt. + + This handles clients that place system messages inside the messages array + instead of the Anthropic-standard top-level ``system`` field. + """ + + def test_inline_system_merged_with_top_level_system(self): + """Full integration: inline system + top-level system + user message.""" + request = _make_request( + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "\n.....\n\n\n", + }, + { + "type": "text", + "text": "help?", + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "system", + "content": ".....", + }, + ], + system=[ + { + "type": "text", + "text": "x-anthropic-billing-header: " + "cc_version=2.1.160.bca; cc_entrypoint=cli; cch=d1d48;", + }, + { + "type": "text", + "text": "You are Claude Code, Anthropic's official CLI for Claude.", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "text", + "text": "....", + "cache_control": {"type": "ephemeral"}, + }, + ], + tools=[], + ) + + result = _convert(request) + + # First message should be the merged system prompt. + assert result.messages[0]["role"] == "system" + # Billing header stripped, inline system appended. + assert ( + result.messages[0]["content"] + == "You are Claude Code, Anthropic's official CLI for Claude." + "...." + "....." + ) + + # Second message should be the user message, content preserved. + assert result.messages[1]["role"] == "user" + user_content = result.messages[1]["content"] + assert len(user_content) == 2 + assert user_content[0] == { + "type": "text", + "text": "\n.....\n\n\n", + } + assert user_content[1] == { + "type": "text", + "text": "help?", + } + + def test_inline_system_string_only(self): + """Only an inline system string, no top-level system.""" + request = _make_request( + [ + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "Be concise."}, + ] + ) + result = _convert(request) + + assert result.messages[0]["role"] == "system" + assert result.messages[0]["content"] == "Be concise." + assert result.messages[1]["role"] == "user" + + def test_inline_system_list_content(self): + """Inline system with list content blocks.""" + request = _make_request( + [ + {"role": "user", "content": "Hi"}, + { + "role": "system", + "content": [ + {"type": "text", "text": "Part one. "}, + {"type": "text", "text": "Part two."}, + ], + }, + ] + ) + result = _convert(request) + + assert result.messages[0]["role"] == "system" + assert result.messages[0]["content"] == "Part one. Part two." + + def test_multiple_inline_system_messages(self): + """Multiple inline system messages should all be merged.""" + request = _make_request( + [ + {"role": "system", "content": "First system."}, + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "Second system."}, + ] + ) + result = _convert(request) + + assert result.messages[0]["role"] == "system" + assert result.messages[0]["content"] == "First system.Second system." + assert result.messages[1]["role"] == "user" + + def test_inline_system_with_top_level_string(self): + """Top-level system is a string, inline system is also present.""" + request = _make_request( + [ + {"role": "user", "content": "Hello"}, + {"role": "system", "content": "Inline hint."}, + ], + system="Top-level prompt.", + ) + result = _convert(request) + + assert result.messages[0]["role"] == "system" + assert result.messages[0]["content"] == "Top-level prompt.Inline hint." + assert result.messages[1]["role"] == "user" diff --git a/vllm/entrypoints/anthropic/protocol.py b/vllm/entrypoints/anthropic/protocol.py index 3ebc171173e9..279f36253455 100644 --- a/vllm/entrypoints/anthropic/protocol.py +++ b/vllm/entrypoints/anthropic/protocol.py @@ -65,7 +65,7 @@ class AnthropicContentBlock(BaseModel): class AnthropicMessage(BaseModel): """Message structure""" - role: Literal["user", "assistant"] + role: Literal["user", "assistant", "system"] content: str | list[AnthropicContentBlock] diff --git a/vllm/entrypoints/anthropic/serving.py b/vllm/entrypoints/anthropic/serving.py index 915cee59f981..2bdec6f4ec33 100644 --- a/vllm/entrypoints/anthropic/serving.py +++ b/vllm/entrypoints/anthropic/serving.py @@ -143,23 +143,36 @@ def _convert_system_message( openai_messages: list[dict[str, Any]], ) -> None: """Convert Anthropic system message to OpenAI format""" - if not anthropic_request.system: - return + system_parts: list[str] = [] - if isinstance(anthropic_request.system, str): - openai_messages.append( - {"role": "system", "content": anthropic_request.system} - ) - else: - system_prompt = "" - for block in anthropic_request.system: - if block.type == "text" and block.text: - # Strip Claude Code's attribution header which contains - # a per-request hash that defeats prefix caching. - if block.text.startswith("x-anthropic-billing-header"): - continue - system_prompt += block.text - openai_messages.append({"role": "system", "content": system_prompt}) + # Top-level system field + if anthropic_request.system: + if isinstance(anthropic_request.system, str): + system_parts.append(anthropic_request.system) + else: + for block in anthropic_request.system: + if block.type == "text" and block.text: + # Strip Claude Code's attribution header which contains + # a per-request hash that defeats prefix caching. + if block.text.startswith("x-anthropic-billing-header"): + continue + system_parts.append(block.text) + + # System messages embedded inside the messages array + for msg in anthropic_request.messages: + if msg.role != "system": + continue + if isinstance(msg.content, str): + system_parts.append(msg.content) + else: + for block in msg.content: + if block.type == "text" and block.text: + if block.text.startswith("x-anthropic-billing-header"): + continue + system_parts.append(block.text) + + if system_parts: + openai_messages.append({"role": "system", "content": "".join(system_parts)}) @classmethod def _convert_messages( @@ -167,6 +180,9 @@ def _convert_messages( ) -> None: """Convert Anthropic messages to OpenAI format""" for msg in messages: + if msg.role == "system": + continue + openai_msg: dict[str, Any] = {"role": msg.role} # type: ignore if isinstance(msg.content, str):