Skip to content

Commit 60a350e

Browse files
author
Andrew Xia
committed
clean
Signed-off-by: Andrew Xia <[email protected]>
1 parent 783b011 commit 60a350e

File tree

5 files changed

+15
-102
lines changed

5 files changed

+15
-102
lines changed

vllm/entrypoints/context.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,6 @@ def __init__(
243243
self.chat_template_content_format = chat_template_content_format
244244

245245
def append_output(self, output: RequestOutput) -> None:
246-
# TODO: output.prompt / output.prompt_token_id doesn't update correctly
247246
self.num_prompt_tokens = len(output.prompt_token_ids or [])
248247
self.num_cached_tokens = output.num_cached_tokens or 0
249248
self.num_output_tokens += len(output.outputs[0].token_ids or [])

vllm/entrypoints/openai/parser/responses_parser.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def __init__(
3737
response_messages
3838
)
3939
self.num_init_messages = len(response_messages)
40-
self.tokens: list[int] = []
4140
self.tokenizer = tokenizer
4241
self.request = request
4342

@@ -49,7 +48,6 @@ def process(self, output: CompletionOutput) -> "ResponsesParser":
4948
output.text, request=self.request
5049
)
5150
if reasoning_content:
52-
# HACK
5351
self.response_messages.append(
5452
ResponseReasoningItem(
5553
type="reasoning",

vllm/entrypoints/openai/protocol.py

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,84 +1779,6 @@ class ResponseInProgressEvent(OpenAIResponseInProgressEvent):
17791779
)
17801780

17811781

1782-
BatchRequestInputBody: TypeAlias = (
1783-
ChatCompletionRequest | EmbeddingRequest | ScoreRequest | RerankRequest
1784-
)
1785-
1786-
1787-
class BatchRequestInput(OpenAIBaseModel):
1788-
"""
1789-
The per-line object of the batch input file.
1790-
1791-
NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
1792-
"""
1793-
1794-
# A developer-provided per-request id that will be used to match outputs to
1795-
# inputs. Must be unique for each request in a batch.
1796-
custom_id: str
1797-
1798-
# The HTTP method to be used for the request. Currently only POST is
1799-
# supported.
1800-
method: str
1801-
1802-
# The OpenAI API relative URL to be used for the request. Currently
1803-
# /v1/chat/completions is supported.
1804-
url: str
1805-
1806-
# The parameters of the request.
1807-
body: BatchRequestInputBody
1808-
1809-
@field_validator("body", mode="plain")
1810-
@classmethod
1811-
def check_type_for_url(cls, value: Any, info: ValidationInfo):
1812-
# Use url to disambiguate models
1813-
url: str = info.data["url"]
1814-
if url == "/v1/chat/completions":
1815-
return ChatCompletionRequest.model_validate(value)
1816-
if url == "/v1/embeddings":
1817-
return TypeAdapter(EmbeddingRequest).validate_python(value)
1818-
if url.endswith("/score"):
1819-
return ScoreRequest.model_validate(value)
1820-
if url.endswith("/rerank"):
1821-
return RerankRequest.model_validate(value)
1822-
return TypeAdapter(BatchRequestInputBody).validate_python(value)
1823-
1824-
1825-
class BatchResponseData(OpenAIBaseModel):
1826-
# HTTP status code of the response.
1827-
status_code: int = 200
1828-
1829-
# An unique identifier for the API request.
1830-
request_id: str
1831-
1832-
# The body of the response.
1833-
body: (
1834-
ChatCompletionResponse
1835-
| EmbeddingResponse
1836-
| ScoreResponse
1837-
| RerankResponse
1838-
| None
1839-
) = None
1840-
1841-
1842-
class BatchRequestOutput(OpenAIBaseModel):
1843-
"""
1844-
The per-line object of the batch output and error files
1845-
"""
1846-
1847-
id: str
1848-
1849-
# A developer-provided per-request id that will be used to match outputs to
1850-
# inputs.
1851-
custom_id: str
1852-
1853-
response: BatchResponseData | None
1854-
1855-
# For requests that failed with a non-HTTP error, this will contain more
1856-
# information on the cause of the failure.
1857-
error: Any | None
1858-
1859-
18601782
class TokenizeCompletionRequest(OpenAIBaseModel):
18611783
model: str | None = None
18621784
prompt: str

vllm/entrypoints/openai/serving_engine.py

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
from starlette.datastructures import Headers
1919
from typing_extensions import TypeIs
2020

21+
from vllm.entrypoints.context import (
22+
HarmonyContext,
23+
ParsableContext,
24+
StreamingHarmonyContext,
25+
)
2126
from vllm.entrypoints.pooling.classify.protocol import (
2227
ClassificationChatRequest,
2328
ClassificationCompletionRequest,
@@ -39,7 +44,6 @@
3944
ScoreRequest,
4045
ScoreResponse,
4146
)
42-
from vllm.entrypoints.context import HarmonyContext, StreamingHarmonyContext
4347

4448
if sys.version_info >= (3, 12):
4549
from typing import TypedDict
@@ -1319,26 +1323,15 @@ async def _generate_with_builtin_tools(
13191323
prompt_token_ids = context.render_for_completion()
13201324
engine_prompt = EngineTokensPrompt(prompt_token_ids=prompt_token_ids)
13211325
request_prompt = prompt_token_ids
1322-
else:
1323-
[
1324-
request,
1325-
tokenizer,
1326-
messages,
1327-
tool_dicts,
1328-
tool_parser,
1329-
chat_template,
1330-
chat_template_content_format,
1331-
] = context.render_for_completion()
1332-
1333-
# HACK
1326+
elif isinstance(context, ParsableContext):
13341327
request_prompts, engine_prompts = await self._render_next_turn(
1335-
request,
1336-
tokenizer,
1337-
messages,
1338-
tool_dicts,
1339-
tool_parser,
1340-
chat_template,
1341-
chat_template_content_format,
1328+
context.request,
1329+
context.tokenizer,
1330+
context.messages,
1331+
context.tool_dicts,
1332+
context.tool_parser,
1333+
context.chat_template,
1334+
context.chat_template_content_format,
13421335
)
13431336
engine_prompt = engine_prompts[0]
13441337
request_prompt = request_prompts[0]

vllm/entrypoints/responses_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
ChatCompletionMessageParam,
2727
ResponseInputOutputItem,
2828
)
29+
from vllm.utils import random_uuid
2930

3031

3132
def make_response_output_items_from_parsable_context(
@@ -39,7 +40,7 @@ def make_response_output_items_from_parsable_context(
3940
else:
4041
if isinstance(output_messages[-1], ResponseFunctionToolCall):
4142
mcp_message = McpCall(
42-
id="lol",
43+
id=f"mcp_{random_uuid()}",
4344
arguments=output_messages[-1].arguments,
4445
name=output_messages[-1].name,
4546
server_label=output_messages[-1].name, # TODO

0 commit comments

Comments
 (0)