Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions tests/entrypoints/anthropic/test_anthropic_messages_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,3 +635,143 @@ def test_redacted_thinking_block_is_accepted(self):
# Redacted thinking is ignored, normal thinking still becomes reasoning.
assert asst.get("reasoning") == "Thinking..."
assert asst.get("content") == "Hi!"


class TestInlineSystemMessageInMessagesArray:
"""Verify that ``role: system`` messages embedded inside the ``messages``
array are accepted and merged with the top-level ``system`` prompt.

This handles clients that place system messages inside the messages array
instead of the Anthropic-standard top-level ``system`` field.
"""

def test_inline_system_merged_with_top_level_system(self):
"""Full integration: inline system + top-level system + user message."""
request = _make_request(
[
{
"role": "user",
"content": [
{
"type": "text",
"text": "<system-reminder>\n.....\n</system-reminder>\n\n",
},
{
"type": "text",
"text": "help?",
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "system",
"content": ".....",
},
],
system=[
{
"type": "text",
"text": "x-anthropic-billing-header: "
"cc_version=2.1.160.bca; cc_entrypoint=cli; cch=d1d48;",
},
{
"type": "text",
"text": "You are Claude Code, Anthropic's official CLI for Claude.",
"cache_control": {"type": "ephemeral"},
},
{
"type": "text",
"text": "....",
"cache_control": {"type": "ephemeral"},
},
],
tools=[],
)

result = _convert(request)

# First message should be the merged system prompt.
assert result.messages[0]["role"] == "system"
# Billing header stripped, inline system appended.
assert (
result.messages[0]["content"]
== "You are Claude Code, Anthropic's official CLI for Claude."
"...."
"....."
)

# Second message should be the user message, content preserved.
assert result.messages[1]["role"] == "user"
user_content = result.messages[1]["content"]
assert len(user_content) == 2
assert user_content[0] == {
"type": "text",
"text": "<system-reminder>\n.....\n</system-reminder>\n\n",
}
assert user_content[1] == {
"type": "text",
"text": "help?",
}

def test_inline_system_string_only(self):
"""Only an inline system string, no top-level system."""
request = _make_request(
[
{"role": "user", "content": "Hello"},
{"role": "system", "content": "Be concise."},
]
)
result = _convert(request)

assert result.messages[0]["role"] == "system"
assert result.messages[0]["content"] == "Be concise."
assert result.messages[1]["role"] == "user"

def test_inline_system_list_content(self):
"""Inline system with list content blocks."""
request = _make_request(
[
{"role": "user", "content": "Hi"},
{
"role": "system",
"content": [
{"type": "text", "text": "Part one. "},
{"type": "text", "text": "Part two."},
],
},
]
)
result = _convert(request)

assert result.messages[0]["role"] == "system"
assert result.messages[0]["content"] == "Part one. Part two."

def test_multiple_inline_system_messages(self):
"""Multiple inline system messages should all be merged."""
request = _make_request(
[
{"role": "system", "content": "First system."},
{"role": "user", "content": "Hello"},
{"role": "system", "content": "Second system."},
]
)
result = _convert(request)

assert result.messages[0]["role"] == "system"
assert result.messages[0]["content"] == "First system.Second system."
assert result.messages[1]["role"] == "user"

def test_inline_system_with_top_level_string(self):
"""Top-level system is a string, inline system is also present."""
request = _make_request(
[
{"role": "user", "content": "Hello"},
{"role": "system", "content": "Inline hint."},
],
system="Top-level prompt.",
)
result = _convert(request)

assert result.messages[0]["role"] == "system"
assert result.messages[0]["content"] == "Top-level prompt.Inline hint."
assert result.messages[1]["role"] == "user"
2 changes: 1 addition & 1 deletion vllm/entrypoints/anthropic/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class AnthropicContentBlock(BaseModel):
class AnthropicMessage(BaseModel):
"""Message structure"""

role: Literal["user", "assistant"]
role: Literal["user", "assistant", "system"]
content: str | list[AnthropicContentBlock]


Expand Down
48 changes: 32 additions & 16 deletions vllm/entrypoints/anthropic/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,30 +143,46 @@ def _convert_system_message(
openai_messages: list[dict[str, Any]],
) -> None:
"""Convert Anthropic system message to OpenAI format"""
if not anthropic_request.system:
return
system_parts: list[str] = []

if isinstance(anthropic_request.system, str):
openai_messages.append(
{"role": "system", "content": anthropic_request.system}
)
else:
system_prompt = ""
for block in anthropic_request.system:
if block.type == "text" and block.text:
# Strip Claude Code's attribution header which contains
# a per-request hash that defeats prefix caching.
if block.text.startswith("x-anthropic-billing-header"):
continue
system_prompt += block.text
openai_messages.append({"role": "system", "content": system_prompt})
# Top-level system field
if anthropic_request.system:
if isinstance(anthropic_request.system, str):
system_parts.append(anthropic_request.system)
else:
for block in anthropic_request.system:
if block.type == "text" and block.text:
# Strip Claude Code's attribution header which contains
# a per-request hash that defeats prefix caching.
if block.text.startswith("x-anthropic-billing-header"):
continue
system_parts.append(block.text)

# System messages embedded inside the messages array
for msg in anthropic_request.messages:

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chaunceyjiang @aleksandaryanakiev @sfeng33 @andrew @potatosalad I'm a bit concerned about the system role fix. It seems like merging a mid-conversation system:role message into a single system message could cause issues with KV-cache hits. In multi-turn conversations, this would likely change the prefix, potentially hurting cache reuse.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have also observed this issue. The fix here is not correct. I am trying a new solution.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chaunceyjiang
OK, I also have an idea here. Later, I will prepare a Merge Request for you. You can check if it meets your requirements.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if msg.role != "system":
continue
if isinstance(msg.content, str):
system_parts.append(msg.content)
else:
for block in msg.content:
if block.type == "text" and block.text:
if block.text.startswith("x-anthropic-billing-header"):
continue
system_parts.append(block.text)

if system_parts:
openai_messages.append({"role": "system", "content": "".join(system_parts)})

@classmethod
def _convert_messages(
cls, messages: list, openai_messages: list[dict[str, Any]]
) -> None:
"""Convert Anthropic messages to OpenAI format"""
for msg in messages:
if msg.role == "system":
continue

openai_msg: dict[str, Any] = {"role": msg.role} # type: ignore

if isinstance(msg.content, str):
Expand Down
Loading