diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 1c92a22080..be46239106 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -7755,6 +7755,7 @@ components: OpenAIResponseObjectStream: discriminator: mapping: + error: '#/components/schemas/OpenAIResponseObjectStreamError' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' @@ -7865,7 +7866,9 @@ components: title: OpenAIResponseObjectStreamResponseFailed - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' title: OpenAIResponseObjectStreamResponseCompleted - title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + - $ref: '#/components/schemas/OpenAIResponseObjectStreamError' + title: OpenAIResponseObjectStreamError + title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants) OpenAIResponseObjectStreamResponseCompleted: description: Streaming event indicating a response has been completed. properties: @@ -13197,6 +13200,38 @@ components: - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' title: OpenAIResponseContentPartReasoningText title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + OpenAIResponseObjectStreamError: + description: |- + Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + properties: + code: + anyOf: + - type: string + - type: 'null' + nullable: true + message: + title: Message + type: string + param: + anyOf: + - type: string + - type: 'null' + nullable: true + sequence_number: + title: Sequence Number + type: integer + type: + title: Type + type: string + enum: + - error + required: + - message + - sequence_number + title: OpenAIResponseObjectStreamError ListBenchmarksRequest: description: Request model for listing benchmarks. properties: {} diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 54d93bb7d8..60315a3a26 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -4113,6 +4113,7 @@ components: OpenAIResponseObjectStream: discriminator: mapping: + error: '#/components/schemas/OpenAIResponseObjectStreamError' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' @@ -4223,7 +4224,9 @@ components: title: OpenAIResponseObjectStreamResponseFailed - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' title: OpenAIResponseObjectStreamResponseCompleted - title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + - $ref: '#/components/schemas/OpenAIResponseObjectStreamError' + title: OpenAIResponseObjectStreamError + title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants) OpenAIResponseObjectStreamResponseCompleted: description: Streaming event indicating a response has been completed. properties: @@ -9557,6 +9560,38 @@ components: - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' title: OpenAIResponseContentPartReasoningText title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + OpenAIResponseObjectStreamError: + description: |- + Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + properties: + code: + anyOf: + - type: string + - type: 'null' + nullable: true + message: + title: Message + type: string + param: + anyOf: + - type: string + - type: 'null' + nullable: true + sequence_number: + title: Sequence Number + type: integer + type: + title: Type + type: string + enum: + - error + required: + - message + - sequence_number + title: OpenAIResponseObjectStreamError ListBenchmarksRequest: description: Request model for listing benchmarks. properties: {} diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 8529a64c29..fc24019360 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -4654,6 +4654,7 @@ components: OpenAIResponseObjectStream: discriminator: mapping: + error: '#/components/schemas/OpenAIResponseObjectStreamError' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' @@ -4764,7 +4765,9 @@ components: title: OpenAIResponseObjectStreamResponseFailed - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' title: OpenAIResponseObjectStreamResponseCompleted - title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + - $ref: '#/components/schemas/OpenAIResponseObjectStreamError' + title: OpenAIResponseObjectStreamError + title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants) OpenAIResponseObjectStreamResponseCompleted: description: Streaming event indicating a response has been completed. properties: @@ -9765,6 +9768,38 @@ components: - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' title: OpenAIResponseContentPartReasoningText title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + OpenAIResponseObjectStreamError: + description: |- + Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + properties: + code: + anyOf: + - type: string + - type: 'null' + nullable: true + message: + title: Message + type: string + param: + anyOf: + - type: string + - type: 'null' + nullable: true + sequence_number: + title: Sequence Number + type: integer + type: + title: Type + type: string + enum: + - error + required: + - message + - sequence_number + title: OpenAIResponseObjectStreamError ListBenchmarksRequest: description: Request model for listing benchmarks. properties: {} diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index a7516b40d7..44af7cdb9c 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -6703,6 +6703,7 @@ components: OpenAIResponseObjectStream: discriminator: mapping: + error: '#/components/schemas/OpenAIResponseObjectStreamError' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' @@ -6813,7 +6814,9 @@ components: title: OpenAIResponseObjectStreamResponseFailed - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' title: OpenAIResponseObjectStreamResponseCompleted - title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + - $ref: '#/components/schemas/OpenAIResponseObjectStreamError' + title: OpenAIResponseObjectStreamError + title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants) OpenAIResponseObjectStreamResponseCompleted: description: Streaming event indicating a response has been completed. properties: @@ -12122,6 +12125,38 @@ components: - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' title: OpenAIResponseContentPartReasoningText title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + OpenAIResponseObjectStreamError: + description: |- + Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + properties: + code: + anyOf: + - type: string + - type: 'null' + nullable: true + message: + title: Message + type: string + param: + anyOf: + - type: string + - type: 'null' + nullable: true + sequence_number: + title: Sequence Number + type: integer + type: + title: Type + type: string + enum: + - error + required: + - message + - sequence_number + title: OpenAIResponseObjectStreamError ListBenchmarksRequest: description: Request model for listing benchmarks. properties: {} diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 1c92a22080..be46239106 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -7755,6 +7755,7 @@ components: OpenAIResponseObjectStream: discriminator: mapping: + error: '#/components/schemas/OpenAIResponseObjectStreamError' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' @@ -7865,7 +7866,9 @@ components: title: OpenAIResponseObjectStreamResponseFailed - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' title: OpenAIResponseObjectStreamResponseCompleted - title: OpenAIResponseObjectStreamResponseCreated | ... (36 variants) + - $ref: '#/components/schemas/OpenAIResponseObjectStreamError' + title: OpenAIResponseObjectStreamError + title: OpenAIResponseObjectStreamResponseCreated | ... (37 variants) OpenAIResponseObjectStreamResponseCompleted: description: Streaming event indicating a response has been completed. properties: @@ -13197,6 +13200,38 @@ components: - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' title: OpenAIResponseContentPartReasoningText title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText + OpenAIResponseObjectStreamError: + description: |- + Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + properties: + code: + anyOf: + - type: string + - type: 'null' + nullable: true + message: + title: Message + type: string + param: + anyOf: + - type: string + - type: 'null' + nullable: true + sequence_number: + title: Sequence Number + type: integer + type: + title: Type + type: string + enum: + - error + required: + - message + - sequence_number + title: OpenAIResponseObjectStreamError ListBenchmarksRequest: description: Request model for listing benchmarks. properties: {} diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py index 3566ebb0fd..0b84c25733 100644 --- a/src/llama_stack_api/__init__.py +++ b/src/llama_stack_api/__init__.py @@ -376,6 +376,7 @@ OpenAIResponseMessage, OpenAIResponseObject, OpenAIResponseObjectStream, + OpenAIResponseObjectStreamError, OpenAIResponseObjectStreamResponseCompleted, OpenAIResponseObjectStreamResponseContentPartAdded, OpenAIResponseObjectStreamResponseContentPartDone, @@ -889,6 +890,7 @@ "OpenAIResponseMessage", "OpenAIResponseObject", "OpenAIResponseObjectStream", + "OpenAIResponseObjectStreamError", "OpenAIResponseObjectStreamResponseCompleted", "OpenAIResponseObjectStreamResponseContentPartAdded", "OpenAIResponseObjectStreamResponseContentPartDone", diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py index c5ce42d940..7d5e45c192 100644 --- a/src/llama_stack_api/openai_responses.py +++ b/src/llama_stack_api/openai_responses.py @@ -869,6 +869,27 @@ class OpenAIResponseObjectStreamResponseFailed(BaseModel): type: Literal["response.failed"] = "response.failed" +@json_schema_type +class OpenAIResponseObjectStreamError(BaseModel): + """Standalone error event emitted during streaming when an error occurs. + + This is distinct from response.failed which is a response lifecycle event. + The error event signals transport/infrastructure-level errors to the client. + + :param code: The error code (e.g. "server_error", "rate_limit_exceeded") + :param message: A human-readable description of the error + :param param: The parameter that caused the error, if applicable + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "error" + """ + + code: str | None = None + message: str + param: str | None = None + sequence_number: int + type: Literal["error"] = "error" + + @json_schema_type class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel): """Streaming event for when a new output item is added to the response. @@ -1468,7 +1489,8 @@ class OpenAIResponseObjectStreamResponseFileSearchCallCompleted(BaseModel): | OpenAIResponseObjectStreamResponseFileSearchCallCompleted | OpenAIResponseObjectStreamResponseIncomplete | OpenAIResponseObjectStreamResponseFailed - | OpenAIResponseObjectStreamResponseCompleted, + | OpenAIResponseObjectStreamResponseCompleted + | OpenAIResponseObjectStreamError, Field(discriminator="type"), ] register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") diff --git a/src/llama_stack_api/responses/fastapi_routes.py b/src/llama_stack_api/responses/fastapi_routes.py index c916d18206..da99ad1cc4 100644 --- a/src/llama_stack_api/responses/fastapi_routes.py +++ b/src/llama_stack_api/responses/fastapi_routes.py @@ -21,13 +21,13 @@ from fastapi.responses import StreamingResponse from pydantic import BaseModel -from llama_stack_api.common.errors import OpenAIErrorResponse from llama_stack_api.common.responses import Order from llama_stack_api.openai_responses import ( ListOpenAIResponseInputItem, ListOpenAIResponseObject, OpenAIDeleteResponseObject, OpenAIResponseObject, + OpenAIResponseObjectStreamError, ) from llama_stack_api.router_utils import ( ExceptionTranslatingRoute, @@ -124,8 +124,13 @@ async def sse_generator(event_gen: AsyncIterator[Any]) -> AsyncIterator[str]: This function iterates over an async generator and formats each yielded item as a Server-Sent Event. """ + # Track the last sequence_number seen so that if an error occurs mid-stream, + # the error event can continue the sequence (last seen + 1). + sequence_number = 0 try: async for item in event_gen: + if hasattr(item, "sequence_number"): + sequence_number = item.sequence_number yield create_sse_event(item) except asyncio.CancelledError: if hasattr(event_gen, "aclose"): @@ -134,9 +139,15 @@ async def sse_generator(event_gen: AsyncIterator[Any]) -> AsyncIterator[str]: except Exception as e: logger.exception("Error in SSE generator") http_exc = try_translate_to_http_exception(e) - status_code = http_exc.status_code if http_exc else 500 + status_code = str(http_exc.status_code) if http_exc else "server_error" detail = http_exc.detail if http_exc else "Internal server error: An unexpected error occurred." - yield create_sse_event(OpenAIErrorResponse.from_message(detail, code=str(status_code)).to_dict()) + yield create_sse_event( + OpenAIResponseObjectStreamError( + code=status_code, + message=detail, + sequence_number=sequence_number + 1, + ) + ) # Automatically generate dependency functions from Pydantic models diff --git a/tests/unit/core/routers/test_responses_router.py b/tests/unit/core/routers/test_responses_router.py index 72cc8ebecf..3101c963c4 100644 --- a/tests/unit/core/routers/test_responses_router.py +++ b/tests/unit/core/routers/test_responses_router.py @@ -240,8 +240,9 @@ async def _stream(): break assert first_event is not None - assert '"code": "400"' in first_event - assert '"message": "not found"' in first_event + assert '"type":"error"' in first_event + assert '"code":"400"' in first_event + assert '"message":"not found"' in first_event async def test_get_response_returns_response_object():