Skip to content

Commit f76c9c8

Browse files
robinnarsinghranabhatrranabha
authored andcommitted
fix: emit response.output_text.done streaming event per OpenAI spec
The LlamaStack server was missing the `response.output_text.done` streaming event, which the OpenAI Responses API spec requires between `output_text.delta` and `content_part.done`. This event carries the final accumulated text and logprobs. Discovered by comparing streaming event sequences between OpenAI's gpt-5.1 (ground truth) and LlamaStack server output using the OpenAI Python client. Changes: - streaming.py: Import and emit OutputTextDone with final text and logprobs before content_part.done - openai_responses.py: Add logprobs field to OutputTextDone type definition (required per OpenAI spec) - test_openai_responses.py: Verify output_text.done is emitted with correct fields and ordering (before content_part.done)
1 parent f30e486 commit f76c9c8

8 files changed

Lines changed: 67 additions & 2 deletions

File tree

client-sdks/stainless/openapi.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8476,6 +8476,13 @@ components:
84768476
sequence_number:
84778477
title: Sequence Number
84788478
type: integer
8479+
logprobs:
8480+
anyOf:
8481+
- items:
8482+
$ref: '#/components/schemas/OpenAITokenLogProb'
8483+
type: array
8484+
- type: 'null'
8485+
nullable: true
84798486
type:
84808487
const: response.output_text.done
84818488
default: response.output_text.done

docs/static/deprecated-llama-stack-spec.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5177,6 +5177,13 @@ components:
51775177
sequence_number:
51785178
title: Sequence Number
51795179
type: integer
5180+
logprobs:
5181+
anyOf:
5182+
- items:
5183+
$ref: '#/components/schemas/OpenAITokenLogProb'
5184+
type: array
5185+
- type: 'null'
5186+
nullable: true
51805187
type:
51815188
const: response.output_text.done
51825189
default: response.output_text.done

docs/static/experimental-llama-stack-spec.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5358,6 +5358,13 @@ components:
53585358
sequence_number:
53595359
title: Sequence Number
53605360
type: integer
5361+
logprobs:
5362+
anyOf:
5363+
- items:
5364+
$ref: '#/components/schemas/OpenAITokenLogProb'
5365+
type: array
5366+
- type: 'null'
5367+
nullable: true
53615368
type:
53625369
const: response.output_text.done
53635370
default: response.output_text.done

docs/static/llama-stack-spec.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7361,6 +7361,13 @@ components:
73617361
sequence_number:
73627362
title: Sequence Number
73637363
type: integer
7364+
logprobs:
7365+
anyOf:
7366+
- items:
7367+
$ref: '#/components/schemas/OpenAITokenLogProb'
7368+
type: array
7369+
- type: 'null'
7370+
nullable: true
73647371
type:
73657372
const: response.output_text.done
73667373
default: response.output_text.done

docs/static/stainless-llama-stack-spec.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8476,6 +8476,13 @@ components:
84768476
sequence_number:
84778477
title: Sequence Number
84788478
type: integer
8479+
logprobs:
8480+
anyOf:
8481+
- items:
8482+
$ref: '#/components/schemas/OpenAITokenLogProb'
8483+
type: array
8484+
- type: 'null'
8485+
nullable: true
84798486
type:
84808487
const: response.output_text.done
84818488
default: response.output_text.done

src/llama_stack/providers/inline/responses/builtin/responses/streaming.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
OpenAIResponseObjectStreamResponseOutputItemAdded,
7575
OpenAIResponseObjectStreamResponseOutputItemDone,
7676
OpenAIResponseObjectStreamResponseOutputTextDelta,
77+
OpenAIResponseObjectStreamResponseOutputTextDone,
7778
OpenAIResponseObjectStreamResponseReasoningTextDelta,
7879
OpenAIResponseObjectStreamResponseReasoningTextDone,
7980
OpenAIResponseObjectStreamResponseRefusalDelta,
@@ -1145,9 +1146,19 @@ async def _process_streaming_chunks(
11451146
sequence_number=self.sequence_number,
11461147
)
11471148

1148-
# Emit content_part.done event if text content was streamed (before content gets cleared)
1149+
# Emit output_text.done and content_part.done events if text content was streamed
11491150
if content_part_emitted:
11501151
final_text = "".join(chat_response_content)
1152+
# Emit output_text.done with the final accumulated text (per OpenAI protocol)
1153+
self.sequence_number += 1
1154+
yield OpenAIResponseObjectStreamResponseOutputTextDone(
1155+
content_index=content_index,
1156+
text=final_text,
1157+
item_id=message_item_id,
1158+
output_index=message_output_index,
1159+
sequence_number=self.sequence_number,
1160+
logprobs=chat_response_logprobs if chat_response_logprobs else [],
1161+
)
11511162
self.sequence_number += 1
11521163
yield OpenAIResponseObjectStreamResponseContentPartDone(
11531164
content_index=content_index,

src/llama_stack_api/openai_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
928928
:param item_id: Unique identifier of the completed output item
929929
:param output_index: Index position of the item in the output list
930930
:param sequence_number: Sequential number for ordering streaming events
931+
:param logprobs: Token log probability details for the completed text
931932
:param type: Event type identifier, always "response.output_text.done"
932933
"""
933934

@@ -936,6 +937,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
936937
item_id: str
937938
output_index: int
938939
sequence_number: int
940+
logprobs: list[OpenAITokenLogProb] | None = None
939941
type: Literal["response.output_text.done"] = "response.output_text.done"
940942

941943

tests/unit/providers/responses/builtin/test_openai_responses.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
241241
)
242242

243243
# Should have content part events for text streaming
244-
# Expected: response.created, response.in_progress, content_part.added, output_text.delta, content_part.done, response.completed
244+
# Expected: response.created, response.in_progress, output_item.added, content_part.added,
245+
# output_text.delta, output_text.done, content_part.done, output_item.done, response.completed
245246
assert len(chunks) >= 5
246247
assert chunks[0].type == "response.created"
247248
assert any(chunk.type == "response.in_progress" for chunk in chunks)
@@ -250,10 +251,12 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
250251
content_part_added_events = [c for c in chunks if c.type == "response.content_part.added"]
251252
content_part_done_events = [c for c in chunks if c.type == "response.content_part.done"]
252253
text_delta_events = [c for c in chunks if c.type == "response.output_text.delta"]
254+
text_done_events = [c for c in chunks if c.type == "response.output_text.done"]
253255

254256
assert len(content_part_added_events) >= 1, "Should have content_part.added event for text"
255257
assert len(content_part_done_events) >= 1, "Should have content_part.done event for text"
256258
assert len(text_delta_events) >= 1, "Should have text delta events"
259+
assert len(text_done_events) >= 1, "Should have output_text.done event with final accumulated text"
257260

258261
added_event = content_part_added_events[0]
259262
done_event = content_part_done_events[0]
@@ -263,6 +266,20 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
263266
assert added_event.item_id == done_event.item_id
264267
assert added_event.response_id == done_event.response_id
265268

269+
# Verify output_text.done contains the final accumulated text and correct indices
270+
text_done_event = text_done_events[0]
271+
assert text_done_event.content_index == 0
272+
assert text_done_event.output_index == 0
273+
assert text_done_event.item_id == added_event.item_id
274+
assert isinstance(text_done_event.text, str)
275+
assert len(text_done_event.text) > 0, "output_text.done should contain the final text"
276+
277+
# Verify output_text.done comes before content_part.done (per OpenAI protocol)
278+
chunk_types = [c.type for c in chunks]
279+
text_done_idx = chunk_types.index("response.output_text.done")
280+
content_done_idx = chunk_types.index("response.content_part.done")
281+
assert text_done_idx < content_done_idx, "output_text.done must precede content_part.done"
282+
266283
# Verify final event is completion
267284
assert chunks[-1].type == "response.completed"
268285

0 commit comments

Comments
 (0)