Skip to content

Commit cc9f253

Browse files
Fix sse handling for speech models (#436)
* Fix sse handling for speech models * fix integration tests
1 parent 35fd835 commit cc9f253

File tree

5 files changed

+8
-40
lines changed

5 files changed

+8
-40
lines changed

src/together/resources/audio/speech.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
AudioLanguage,
1111
AudioResponseEncoding,
1212
AudioSpeechStreamChunk,
13-
AudioSpeechStreamEvent,
1413
AudioSpeechStreamResponse,
1514
TogetherClient,
1615
TogetherRequest,

src/together/types/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
AudioResponseFormat,
66
AudioSpeechRequest,
77
AudioSpeechStreamChunk,
8-
AudioSpeechStreamEvent,
98
AudioSpeechStreamResponse,
109
AudioTimestampGranularities,
1110
AudioTranscriptionRequest,
@@ -134,7 +133,6 @@
134133
"AudioLanguage",
135134
"AudioResponseEncoding",
136135
"AudioSpeechStreamChunk",
137-
"AudioSpeechStreamEvent",
138136
"AudioSpeechStreamResponse",
139137
"AudioTranscriptionRequest",
140138
"AudioTranslationRequest",

src/together/types/audio_speech.py

Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,6 @@ class AudioResponseEncoding(str, Enum):
4141
PCM_ALAW = "pcm_alaw"
4242

4343

44-
class AudioObjectType(str, Enum):
45-
AUDIO_TTS_CHUNK = "audio.tts.chunk"
46-
47-
48-
class StreamSentinelType(str, Enum):
49-
DONE = "[DONE]"
50-
51-
5244
class AudioSpeechRequest(BaseModel):
5345
model: str
5446
input: str
@@ -61,21 +53,8 @@ class AudioSpeechRequest(BaseModel):
6153

6254

6355
class AudioSpeechStreamChunk(BaseModel):
64-
object: AudioObjectType = AudioObjectType.AUDIO_TTS_CHUNK
65-
model: str
66-
b64: str
67-
68-
69-
class AudioSpeechStreamEvent(BaseModel):
70-
data: AudioSpeechStreamChunk
71-
72-
73-
class StreamSentinel(BaseModel):
74-
data: StreamSentinelType = StreamSentinelType.DONE
75-
76-
77-
class AudioSpeechStreamEventResponse(BaseModel):
78-
response: AudioSpeechStreamEvent | StreamSentinel
56+
type: str = "conversation.item.audio_output.delta"
57+
delta: str
7958

8059

8160
class AudioSpeechStreamResponse(BaseModel):
@@ -127,18 +106,10 @@ def stream_to_file(
127106
if isinstance(chunk.data, bytes):
128107
audio_chunks.append(chunk.data)
129108
elif isinstance(chunk.data, dict):
130-
# SSE format with JSON/base64
131-
try:
132-
stream_event = AudioSpeechStreamEventResponse(
133-
response={"data": chunk.data}
134-
)
135-
if isinstance(stream_event.response, StreamSentinel):
136-
break
137-
audio_chunks.append(
138-
base64.b64decode(stream_event.response.data.b64)
139-
)
140-
except Exception:
141-
continue # Skip malformed chunks
109+
# SSE format: {"type": "conversation.item.audio_output.delta", "delta": "<base64>"}
110+
delta = chunk.data.get("delta")
111+
if delta:
112+
audio_chunks.append(base64.b64decode(delta))
142113

143114
if not audio_chunks:
144115
raise ValueError("No audio data received in streaming response")

tests/integration/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
completion_test_model_list = [
2-
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
2+
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
33
]
44
chat_test_model_list = []
55
embedding_test_model_list = []

tests/integration/resources/test_completion_stream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_create(
3535
random_repetition_penalty, # noqa
3636
) -> None:
3737
prompt = "The space robots have"
38-
model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
38+
model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
3939
stop = ["</s>"]
4040

4141
# max_tokens should be a reasonable number for this test

0 commit comments

Comments
 (0)