Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion client-sdks/stainless/openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7755,6 +7755,7 @@ components:
OpenAIResponseObjectStream:
discriminator:
mapping:
error: '#/components/schemas/OpenAIResponseObjectStreamError'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
Expand Down Expand Up @@ -7865,7 +7866,9 @@ components:
title: OpenAIResponseObjectStreamResponseFailed
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
title: OpenAIResponseObjectStreamResponseCompleted
title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
- $ref: '#/components/schemas/OpenAIResponseObjectStreamError'
title: OpenAIResponseObjectStreamError
title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants)
OpenAIResponseObjectStreamResponseCompleted:
description: Streaming event indicating a response has been completed.
properties:
Expand Down Expand Up @@ -13197,6 +13200,38 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
OpenAIResponseObjectStreamError:
description: |-
Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.
properties:
code:
anyOf:
- type: string
- type: 'null'
nullable: true
message:
title: Message
type: string
param:
anyOf:
- type: string
- type: 'null'
nullable: true
sequence_number:
title: Sequence Number
type: integer
type:
title: Type
type: string
enum:
- error
required:
- message
- sequence_number
title: OpenAIResponseObjectStreamError
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
Expand Down
37 changes: 36 additions & 1 deletion docs/static/deprecated-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4113,6 +4113,7 @@ components:
OpenAIResponseObjectStream:
discriminator:
mapping:
error: '#/components/schemas/OpenAIResponseObjectStreamError'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
Expand Down Expand Up @@ -4223,7 +4224,9 @@ components:
title: OpenAIResponseObjectStreamResponseFailed
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
title: OpenAIResponseObjectStreamResponseCompleted
title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
- $ref: '#/components/schemas/OpenAIResponseObjectStreamError'
title: OpenAIResponseObjectStreamError
title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants)
OpenAIResponseObjectStreamResponseCompleted:
description: Streaming event indicating a response has been completed.
properties:
Expand Down Expand Up @@ -9557,6 +9560,38 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
OpenAIResponseObjectStreamError:
description: |-
Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.
properties:
code:
anyOf:
- type: string
- type: 'null'
nullable: true
message:
title: Message
type: string
param:
anyOf:
- type: string
- type: 'null'
nullable: true
sequence_number:
title: Sequence Number
type: integer
type:
title: Type
type: string
enum:
- error
required:
- message
- sequence_number
title: OpenAIResponseObjectStreamError
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
Expand Down
37 changes: 36 additions & 1 deletion docs/static/experimental-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4654,6 +4654,7 @@ components:
OpenAIResponseObjectStream:
discriminator:
mapping:
error: '#/components/schemas/OpenAIResponseObjectStreamError'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
Expand Down Expand Up @@ -4764,7 +4765,9 @@ components:
title: OpenAIResponseObjectStreamResponseFailed
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
title: OpenAIResponseObjectStreamResponseCompleted
title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
- $ref: '#/components/schemas/OpenAIResponseObjectStreamError'
title: OpenAIResponseObjectStreamError
title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants)
OpenAIResponseObjectStreamResponseCompleted:
description: Streaming event indicating a response has been completed.
properties:
Expand Down Expand Up @@ -9765,6 +9768,38 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
OpenAIResponseObjectStreamError:
description: |-
Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.
properties:
code:
anyOf:
- type: string
- type: 'null'
nullable: true
message:
title: Message
type: string
param:
anyOf:
- type: string
- type: 'null'
nullable: true
sequence_number:
title: Sequence Number
type: integer
type:
title: Type
type: string
enum:
- error
required:
- message
- sequence_number
title: OpenAIResponseObjectStreamError
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
Expand Down
37 changes: 36 additions & 1 deletion docs/static/llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6703,6 +6703,7 @@ components:
OpenAIResponseObjectStream:
discriminator:
mapping:
error: '#/components/schemas/OpenAIResponseObjectStreamError'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
Expand Down Expand Up @@ -6813,7 +6814,9 @@ components:
title: OpenAIResponseObjectStreamResponseFailed
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
title: OpenAIResponseObjectStreamResponseCompleted
title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
- $ref: '#/components/schemas/OpenAIResponseObjectStreamError'
title: OpenAIResponseObjectStreamError
title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants)
OpenAIResponseObjectStreamResponseCompleted:
description: Streaming event indicating a response has been completed.
properties:
Expand Down Expand Up @@ -12122,6 +12125,38 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
OpenAIResponseObjectStreamError:
description: |-
Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.
properties:
code:
anyOf:
- type: string
- type: 'null'
nullable: true
message:
title: Message
type: string
param:
anyOf:
- type: string
- type: 'null'
nullable: true
sequence_number:
title: Sequence Number
type: integer
type:
title: Type
type: string
enum:
- error
required:
- message
- sequence_number
title: OpenAIResponseObjectStreamError
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
Expand Down
37 changes: 36 additions & 1 deletion docs/static/stainless-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7755,6 +7755,7 @@ components:
OpenAIResponseObjectStream:
discriminator:
mapping:
error: '#/components/schemas/OpenAIResponseObjectStreamError'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded'
response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone'
Expand Down Expand Up @@ -7865,7 +7866,9 @@ components:
title: OpenAIResponseObjectStreamResponseFailed
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
title: OpenAIResponseObjectStreamResponseCompleted
title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants)
- $ref: '#/components/schemas/OpenAIResponseObjectStreamError'
title: OpenAIResponseObjectStreamError
title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants)
OpenAIResponseObjectStreamResponseCompleted:
description: Streaming event indicating a response has been completed.
properties:
Expand Down Expand Up @@ -13197,6 +13200,38 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
OpenAIResponseObjectStreamError:
description: |-
Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.
properties:
code:
anyOf:
- type: string
- type: 'null'
nullable: true
message:
title: Message
type: string
param:
anyOf:
- type: string
- type: 'null'
nullable: true
sequence_number:
title: Sequence Number
type: integer
type:
title: Type
type: string
enum:
- error
required:
- message
- sequence_number
title: OpenAIResponseObjectStreamError
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
Expand Down
2 changes: 2 additions & 0 deletions src/llama_stack_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamError,
OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseContentPartAdded,
OpenAIResponseObjectStreamResponseContentPartDone,
Expand Down Expand Up @@ -889,6 +890,7 @@
"OpenAIResponseMessage",
"OpenAIResponseObject",
"OpenAIResponseObjectStream",
"OpenAIResponseObjectStreamError",
"OpenAIResponseObjectStreamResponseCompleted",
"OpenAIResponseObjectStreamResponseContentPartAdded",
"OpenAIResponseObjectStreamResponseContentPartDone",
Expand Down
24 changes: 23 additions & 1 deletion src/llama_stack_api/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,27 @@ class OpenAIResponseObjectStreamResponseFailed(BaseModel):
type: Literal["response.failed"] = "response.failed"


@json_schema_type
class OpenAIResponseObjectStreamError(BaseModel):
"""Standalone error event emitted during streaming when an error occurs.

This is distinct from response.failed which is a response lifecycle event.
The error event signals transport/infrastructure-level errors to the client.

:param code: The error code (e.g. "server_error", "rate_limit_exceeded")
:param message: A human-readable description of the error
:param param: The parameter that caused the error, if applicable
:param sequence_number: Sequential number for ordering streaming events
:param type: Event type identifier, always "error"
"""

code: str | None = None
message: str
param: str | None = None
sequence_number: int
type: Literal["error"] = "error"


@json_schema_type
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
"""Streaming event for when a new output item is added to the response.
Expand Down Expand Up @@ -1468,7 +1489,8 @@ class OpenAIResponseObjectStreamResponseFileSearchCallCompleted(BaseModel):
| OpenAIResponseObjectStreamResponseFileSearchCallCompleted
| OpenAIResponseObjectStreamResponseIncomplete
| OpenAIResponseObjectStreamResponseFailed
| OpenAIResponseObjectStreamResponseCompleted,
| OpenAIResponseObjectStreamResponseCompleted
| OpenAIResponseObjectStreamError,
Field(discriminator="type"),
]
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
Expand Down
17 changes: 14 additions & 3 deletions src/llama_stack_api/responses/fastapi_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from llama_stack_api.common.errors import OpenAIErrorResponse
from llama_stack_api.common.responses import Order
from llama_stack_api.openai_responses import (
ListOpenAIResponseInputItem,
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseObject,
OpenAIResponseObjectStreamError,
)
from llama_stack_api.router_utils import (
ExceptionTranslatingRoute,
Expand Down Expand Up @@ -124,8 +124,13 @@ async def sse_generator(event_gen: AsyncIterator[Any]) -> AsyncIterator[str]:
This function iterates over an async generator and formats each yielded
item as a Server-Sent Event.
"""
# Track the last sequence_number seen so that if an error occurs mid-stream,
# the error event can continue the sequence (last seen + 1).
sequence_number = 0
try:
async for item in event_gen:
if hasattr(item, "sequence_number"):
sequence_number = item.sequence_number
yield create_sse_event(item)
except asyncio.CancelledError:
if hasattr(event_gen, "aclose"):
Expand All @@ -134,9 +139,15 @@ async def sse_generator(event_gen: AsyncIterator[Any]) -> AsyncIterator[str]:
except Exception as e:
logger.exception("Error in SSE generator")
http_exc = try_translate_to_http_exception(e)
status_code = http_exc.status_code if http_exc else 500
status_code = str(http_exc.status_code) if http_exc else "server_error"
detail = http_exc.detail if http_exc else "Internal server error: An unexpected error occurred."
yield create_sse_event(OpenAIErrorResponse.from_message(detail, code=str(status_code)).to_dict())
yield create_sse_event(
OpenAIResponseObjectStreamError(
code=status_code,
message=detail,
sequence_number=sequence_number + 1,
)
)


# Automatically generate dependency functions from Pydantic models
Expand Down
Loading
Loading