diff --git a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py
index eb9798980f06..ad9fed1d355a 100644
--- a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py
+++ b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py
@@ -635,3 +635,143 @@ def test_redacted_thinking_block_is_accepted(self):
# Redacted thinking is ignored, normal thinking still becomes reasoning.
assert asst.get("reasoning") == "Thinking..."
assert asst.get("content") == "Hi!"
+
+
+class TestInlineSystemMessageInMessagesArray:
+ """Verify that ``role: system`` messages embedded inside the ``messages``
+ array are accepted and merged with the top-level ``system`` prompt.
+
+ This handles clients that place system messages inside the messages array
+ instead of the Anthropic-standard top-level ``system`` field.
+ """
+
+ def test_inline_system_merged_with_top_level_system(self):
+ """Full integration: inline system + top-level system + user message."""
+ request = _make_request(
+ [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "\n.....\n\n\n",
+ },
+ {
+ "type": "text",
+ "text": "help?",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ },
+ {
+ "role": "system",
+ "content": ".....",
+ },
+ ],
+ system=[
+ {
+ "type": "text",
+ "text": "x-anthropic-billing-header: "
+ "cc_version=2.1.160.bca; cc_entrypoint=cli; cch=d1d48;",
+ },
+ {
+ "type": "text",
+ "text": "You are Claude Code, Anthropic's official CLI for Claude.",
+ "cache_control": {"type": "ephemeral"},
+ },
+ {
+ "type": "text",
+ "text": "....",
+ "cache_control": {"type": "ephemeral"},
+ },
+ ],
+ tools=[],
+ )
+
+ result = _convert(request)
+
+ # First message should be the merged system prompt.
+ assert result.messages[0]["role"] == "system"
+ # Billing header stripped, inline system appended.
+ assert (
+ result.messages[0]["content"]
+ == "You are Claude Code, Anthropic's official CLI for Claude."
+ "...."
+ "....."
+ )
+
+ # Second message should be the user message, content preserved.
+ assert result.messages[1]["role"] == "user"
+ user_content = result.messages[1]["content"]
+ assert len(user_content) == 2
+ assert user_content[0] == {
+ "type": "text",
+ "text": "\n.....\n\n\n",
+ }
+ assert user_content[1] == {
+ "type": "text",
+ "text": "help?",
+ }
+
+ def test_inline_system_string_only(self):
+ """Only an inline system string, no top-level system."""
+ request = _make_request(
+ [
+ {"role": "user", "content": "Hello"},
+ {"role": "system", "content": "Be concise."},
+ ]
+ )
+ result = _convert(request)
+
+ assert result.messages[0]["role"] == "system"
+ assert result.messages[0]["content"] == "Be concise."
+ assert result.messages[1]["role"] == "user"
+
+ def test_inline_system_list_content(self):
+ """Inline system with list content blocks."""
+ request = _make_request(
+ [
+ {"role": "user", "content": "Hi"},
+ {
+ "role": "system",
+ "content": [
+ {"type": "text", "text": "Part one. "},
+ {"type": "text", "text": "Part two."},
+ ],
+ },
+ ]
+ )
+ result = _convert(request)
+
+ assert result.messages[0]["role"] == "system"
+ assert result.messages[0]["content"] == "Part one. Part two."
+
+ def test_multiple_inline_system_messages(self):
+ """Multiple inline system messages should all be merged."""
+ request = _make_request(
+ [
+ {"role": "system", "content": "First system."},
+ {"role": "user", "content": "Hello"},
+ {"role": "system", "content": "Second system."},
+ ]
+ )
+ result = _convert(request)
+
+ assert result.messages[0]["role"] == "system"
+ assert result.messages[0]["content"] == "First system.Second system."
+ assert result.messages[1]["role"] == "user"
+
+ def test_inline_system_with_top_level_string(self):
+ """Top-level system is a string, inline system is also present."""
+ request = _make_request(
+ [
+ {"role": "user", "content": "Hello"},
+ {"role": "system", "content": "Inline hint."},
+ ],
+ system="Top-level prompt.",
+ )
+ result = _convert(request)
+
+ assert result.messages[0]["role"] == "system"
+ assert result.messages[0]["content"] == "Top-level prompt.Inline hint."
+ assert result.messages[1]["role"] == "user"
diff --git a/vllm/entrypoints/anthropic/protocol.py b/vllm/entrypoints/anthropic/protocol.py
index 3ebc171173e9..279f36253455 100644
--- a/vllm/entrypoints/anthropic/protocol.py
+++ b/vllm/entrypoints/anthropic/protocol.py
@@ -65,7 +65,7 @@ class AnthropicContentBlock(BaseModel):
class AnthropicMessage(BaseModel):
"""Message structure"""
- role: Literal["user", "assistant"]
+ role: Literal["user", "assistant", "system"]
content: str | list[AnthropicContentBlock]
diff --git a/vllm/entrypoints/anthropic/serving.py b/vllm/entrypoints/anthropic/serving.py
index 915cee59f981..2bdec6f4ec33 100644
--- a/vllm/entrypoints/anthropic/serving.py
+++ b/vllm/entrypoints/anthropic/serving.py
@@ -143,23 +143,36 @@ def _convert_system_message(
openai_messages: list[dict[str, Any]],
) -> None:
"""Convert Anthropic system message to OpenAI format"""
- if not anthropic_request.system:
- return
+ system_parts: list[str] = []
- if isinstance(anthropic_request.system, str):
- openai_messages.append(
- {"role": "system", "content": anthropic_request.system}
- )
- else:
- system_prompt = ""
- for block in anthropic_request.system:
- if block.type == "text" and block.text:
- # Strip Claude Code's attribution header which contains
- # a per-request hash that defeats prefix caching.
- if block.text.startswith("x-anthropic-billing-header"):
- continue
- system_prompt += block.text
- openai_messages.append({"role": "system", "content": system_prompt})
+ # Top-level system field
+ if anthropic_request.system:
+ if isinstance(anthropic_request.system, str):
+ system_parts.append(anthropic_request.system)
+ else:
+ for block in anthropic_request.system:
+ if block.type == "text" and block.text:
+ # Strip Claude Code's attribution header which contains
+ # a per-request hash that defeats prefix caching.
+ if block.text.startswith("x-anthropic-billing-header"):
+ continue
+ system_parts.append(block.text)
+
+ # System messages embedded inside the messages array
+ for msg in anthropic_request.messages:
+ if msg.role != "system":
+ continue
+ if isinstance(msg.content, str):
+ system_parts.append(msg.content)
+ else:
+ for block in msg.content:
+ if block.type == "text" and block.text:
+ if block.text.startswith("x-anthropic-billing-header"):
+ continue
+ system_parts.append(block.text)
+
+ if system_parts:
+ openai_messages.append({"role": "system", "content": "".join(system_parts)})
@classmethod
def _convert_messages(
@@ -167,6 +180,9 @@ def _convert_messages(
) -> None:
"""Convert Anthropic messages to OpenAI format"""
for msg in messages:
+ if msg.role == "system":
+ continue
+
openai_msg: dict[str, Any] = {"role": msg.role} # type: ignore
if isinstance(msg.content, str):