Skip to content

Commit d257f92

Browse files
zkleb-aaiAssemblyAI
andauthored
chore: sync sdk code with DeepLearning repo (#199)
Co-authored-by: AssemblyAI <engineering.sdk@assemblyai.com>
1 parent 55eea33 commit d257f92

15 files changed

Lines changed: 2191 additions & 1485 deletions

README.md

Lines changed: 167 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -699,79 +699,187 @@ for result in transcript.auto_highlights.results:
699699

700700
### **Streaming Examples**
701701

702-
[Read more about our streaming service.](https://www.assemblyai.com/docs/streaming/universal-3-pro)
702+
Real-time speech-to-text via WebSocket against the `u3-rt-pro` model. The SDK ships two clients with identical option/event/handler surfaces — `StreamingClient` (threaded) and `AsyncStreamingClient` (asyncio). Pick whichever fits your codebase.
703+
704+
**Handler contract**: every handler is called as `handler(client, event)`. Plain functions and `async def` functions both work; `AsyncStreamingClient` awaits async handlers inline on the read task, so don't block — use `asyncio.create_task(...)` if you need concurrent work.
705+
706+
[Read more about the streaming service.](https://www.assemblyai.com/docs/streaming/universal-3-pro)
707+
708+
<details>
709+
<summary>Stream a local file (sync)</summary>
710+
711+
```python
712+
import assemblyai as aai
713+
from assemblyai.streaming.v3 import (
714+
BeginEvent, StreamingClient, StreamingClientOptions, StreamingError,
715+
StreamingEvents, StreamingParameters, TerminationEvent, TurnEvent,
716+
)
717+
718+
def on_begin(client, event: BeginEvent):
719+
print(f"Session started: {event.id}")
720+
721+
def on_turn(client, event: TurnEvent):
722+
print(f"{event.transcript} (end_of_turn={event.end_of_turn})")
723+
724+
def on_terminated(client, event: TerminationEvent):
725+
print(f"Done: {event.audio_duration_seconds}s of audio processed")
726+
727+
def on_error(client, error: StreamingError):
728+
print(f"Error: {error} (code={error.code})")
729+
730+
client = StreamingClient(StreamingClientOptions(api_key="<YOUR_API_KEY>"))
731+
client.on(StreamingEvents.Begin, on_begin)
732+
client.on(StreamingEvents.Turn, on_turn)
733+
client.on(StreamingEvents.Termination, on_terminated)
734+
client.on(StreamingEvents.Error, on_error)
735+
736+
client.connect(StreamingParameters(
737+
sample_rate=16000, speech_model="u3-rt-pro", format_turns=True,
738+
))
739+
try:
740+
client.stream(aai.extras.stream_file(filepath="audio.wav", sample_rate=16000))
741+
finally:
742+
client.disconnect(terminate=True)
743+
```
744+
745+
</details>
703746

704747
<details>
705-
<summary>Stream your microphone in real-time</summary>
748+
<summary>Stream your microphone (sync)</summary>
749+
750+
`MicrophoneStream` requires PyAudio:
706751

707752
```bash
708-
pip install -U assemblyai
753+
pip install -U "assemblyai[extras]"
709754
```
710755

711756
```python
712-
import logging
713-
from typing import Type
714-
715757
import assemblyai as aai
716758
from assemblyai.streaming.v3 import (
717-
BeginEvent,
718-
StreamingClient,
719-
StreamingClientOptions,
720-
StreamingError,
721-
StreamingEvents,
722-
StreamingParameters,
723-
TurnEvent,
724-
TerminationEvent,
759+
StreamingClient, StreamingClientOptions, StreamingEvents, StreamingParameters,
725760
)
726761

727-
api_key = "<YOUR_API_KEY>"
762+
def on_turn(client, event):
763+
print(f"{event.transcript} (end_of_turn={event.end_of_turn})")
728764

729-
logging.basicConfig(level=logging.INFO)
730-
logger = logging.getLogger(__name__)
765+
client = StreamingClient(StreamingClientOptions(api_key="<YOUR_API_KEY>"))
766+
client.on(StreamingEvents.Turn, on_turn)
767+
client.connect(StreamingParameters(sample_rate=16000, speech_model="u3-rt-pro"))
731768

732-
def on_begin(self: Type[StreamingClient], event: BeginEvent):
733-
print(f"Session started: {event.id}")
769+
try:
770+
client.stream(aai.extras.MicrophoneStream(sample_rate=16000))
771+
finally:
772+
client.disconnect(terminate=True)
773+
```
774+
775+
</details>
776+
777+
<details>
778+
<summary>Stream a local file (async)</summary>
779+
780+
`AsyncStreamingClient` mirrors `StreamingClient` with async methods. It's safe to use as an async context manager — `disconnect()` runs on block exit even if user code raises. Don't pass `extras.stream_file` directly (it uses blocking `time.sleep`); pace from an async generator instead.
781+
782+
```python
783+
import asyncio
784+
from assemblyai.streaming.v3 import (
785+
AsyncStreamingClient, StreamingClientOptions, StreamingEvents, StreamingParameters,
786+
)
787+
788+
async def stream_file_async(path: str, sample_rate: int, chunk_duration: float = 0.3):
789+
bytes_per_chunk = int(sample_rate * chunk_duration) * 2
790+
with open(path, "rb") as f:
791+
while chunk := f.read(bytes_per_chunk):
792+
yield chunk
793+
await asyncio.sleep(chunk_duration)
794+
795+
async def on_turn(client, event):
796+
print(f"{event.transcript} (end_of_turn={event.end_of_turn})")
797+
798+
async def main():
799+
async with AsyncStreamingClient(StreamingClientOptions(api_key="<YOUR_API_KEY>")) as client:
800+
client.on(StreamingEvents.Turn, on_turn)
801+
await client.connect(StreamingParameters(
802+
sample_rate=16000, speech_model="u3-rt-pro", format_turns=True,
803+
))
804+
await client.stream(stream_file_async("audio.wav", 16000))
805+
806+
asyncio.run(main())
807+
```
808+
809+
</details>
734810

735-
def on_turn(self: Type[StreamingClient], event: TurnEvent):
736-
print(f"{event.transcript} ({event.end_of_turn})")
737-
738-
def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
739-
print(
740-
f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"
741-
)
742-
743-
def on_error(self: Type[StreamingClient], error: StreamingError):
744-
print(f"Error occurred: {error}")
745-
746-
def main():
747-
client = StreamingClient(
748-
StreamingClientOptions(
749-
api_key=api_key,
750-
api_host="streaming.assemblyai.com",
751-
)
752-
)
753-
754-
client.on(StreamingEvents.Begin, on_begin)
755-
client.on(StreamingEvents.Turn, on_turn)
756-
client.on(StreamingEvents.Termination, on_terminated)
757-
client.on(StreamingEvents.Error, on_error)
758-
759-
client.connect(
760-
StreamingParameters(
761-
sample_rate=16000,
762-
speech_model="u3-rt-pro",
763-
)
764-
)
765-
766-
try:
767-
client.stream(
768-
aai.extras.MicrophoneStream(sample_rate=16000)
769-
)
770-
finally:
771-
client.disconnect(terminate=True)
772-
773-
if __name__ == "__main__":
774-
main()
811+
<details>
812+
<summary>Handle errors</summary>
813+
814+
Server-side errors arrive on the `Error` event rather than being raised. The handler receives a `StreamingError` (an `Exception` subclass) with `.code: int | None`**not** the wire `ErrorEvent` class.
815+
816+
`StreamingErrorCodes` is a `dict[int, str]` mapping wire codes to human-readable messages. Use `.get(...)` for lookup:
817+
818+
```python
819+
from assemblyai.streaming.v3 import StreamingErrorCodes
820+
821+
def on_error(client, error):
822+
message = StreamingErrorCodes.get(error.code, str(error))
823+
print(f"Streaming error {error.code}: {message}")
824+
```
825+
826+
Common codes: `4001` Not Authorized, `4002` Insufficient Funds, `4029` Client sent audio too fast, `4031` Session idle for too long.
827+
828+
</details>
829+
830+
<details>
831+
<summary>Change settings mid-session</summary>
832+
833+
`set_params` updates an active session. Typical use: enable turn formatting (punctuation, casing) only on confirmed end-of-turn so partial transcripts stay raw:
834+
835+
```python
836+
from assemblyai.streaming.v3 import StreamingSessionParameters
837+
838+
def on_turn(client, event):
839+
if event.end_of_turn and not event.turn_is_formatted:
840+
client.set_params(StreamingSessionParameters(format_turns=True))
841+
```
842+
843+
For voice agents, `force_endpoint()` flushes the current turn — useful when an external signal (UI button, barge-in detection) determines the user has stopped speaking before VAD does:
844+
845+
```python
846+
client.force_endpoint() # ends the current turn immediately
847+
```
848+
849+
</details>
850+
851+
<details>
852+
<summary>Temporary tokens for browser / edge clients</summary>
853+
854+
Don't ship your API key to browsers. Mint a short-lived token server-side and pass it to the client.
855+
856+
**Sync server (Flask / WSGI / scripts):**
857+
```python
858+
client = StreamingClient(StreamingClientOptions(api_key="<YOUR_API_KEY>"))
859+
token = client.create_temporary_token(expires_in_seconds=60)
860+
# Send `token` to the browser, which connects with options(token=token).
861+
```
862+
863+
**Async server (FastAPI / asyncio):** always wrap in `async with` even though you don't call `connect()``create_temporary_token` lazily opens an `httpx.AsyncClient` pool. The context manager closes it on exit; without it you leak a pool every request.
864+
865+
```python
866+
from fastapi import FastAPI
867+
from assemblyai.streaming.v3 import AsyncStreamingClient, StreamingClientOptions
868+
869+
app = FastAPI()
870+
MASTER_KEY = "<YOUR_API_KEY>"
871+
872+
@app.get("/streaming-token")
873+
async def streaming_token():
874+
async with AsyncStreamingClient(StreamingClientOptions(api_key=MASTER_KEY)) as client:
875+
return {"token": await client.create_temporary_token(expires_in_seconds=60)}
876+
```
877+
878+
**Browser / edge client:** pass the token via `StreamingClientOptions(token=...)`:
879+
880+
```python
881+
client = StreamingClient(StreamingClientOptions(token="<TOKEN_FROM_SERVER>"))
882+
client.connect(StreamingParameters(sample_rate=16000, speech_model="u3-rt-pro"))
775883
```
776884

777885
</details>

assemblyai/__init__.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
from .__version__ import __version__
33
from .client import Client
44
from .lemur import Lemur
5-
from .transcriber import RealtimeTranscriber, Transcriber, Transcript, TranscriptGroup
5+
from .transcriber import Transcriber, Transcript, TranscriptGroup
66
from .types import (
77
AssemblyAIError,
8-
AudioEncoding,
98
AutohighlightResponse,
109
AutohighlightResult,
1110
Chapter,
@@ -47,13 +46,6 @@
4746
PIIRedactionPolicy,
4847
PIISubstitutionPolicy,
4948
RawTranscriptionConfig,
50-
RealtimeError,
51-
RealtimeFinalTranscript,
52-
RealtimePartialTranscript,
53-
RealtimeSessionInformation,
54-
RealtimeSessionOpened,
55-
RealtimeTranscript,
56-
RealtimeWord,
5749
RedactPiiAudioOptions,
5850
Sentence,
5951
Sentiment,
@@ -93,7 +85,6 @@
9385
__all__ = [
9486
# types
9587
"AssemblyAIError",
96-
"AudioEncoding",
9788
"AutohighlightResponse",
9889
"AutohighlightResult",
9990
"Chapter",
@@ -170,14 +161,6 @@
170161
"Word",
171162
"WordBoost",
172163
"WordSearchMatch",
173-
"RealtimeTranscriber",
174-
"RealtimeError",
175-
"RealtimeFinalTranscript",
176-
"RealtimePartialTranscript",
177-
"RealtimeSessionInformation",
178-
"RealtimeSessionOpened",
179-
"RealtimeTranscript",
180-
"RealtimeWord",
181164
# package globals
182165
"settings",
183166
# packages

assemblyai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.64.2"
1+
__version__ = "0.64.3"

assemblyai/api.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
ENDPOINT_UPLOAD = "/v2/upload"
1010
ENDPOINT_LEMUR_BASE = "/lemur/v3"
1111
ENDPOINT_LEMUR = f"{ENDPOINT_LEMUR_BASE}/generate"
12-
ENDPOINT_REALTIME_WEBSOCKET = "/v2/realtime/ws"
13-
ENDPOINT_REALTIME_TOKEN = "/v2/realtime/token"
1412

1513

1614
def _get_error_message(response: httpx.Response) -> str:
@@ -415,24 +413,3 @@ def lemur_get_response_data(
415413
return types.LemurQuestionResponse.parse_obj(json_data)
416414

417415
return types.LemurStringResponse.parse_obj(json_data)
418-
419-
420-
def create_temporary_token(
421-
client: httpx.Client,
422-
request: types.RealtimeCreateTemporaryTokenRequest,
423-
http_timeout: Optional[float],
424-
) -> str:
425-
response = client.post(
426-
f"{ENDPOINT_REALTIME_TOKEN}",
427-
json=request.dict(exclude_none=True),
428-
timeout=http_timeout,
429-
)
430-
431-
if response.status_code != httpx.codes.OK:
432-
raise types.AssemblyAIError(
433-
f"Failed to create temporary token: {_get_error_message(response)}",
434-
response.status_code,
435-
)
436-
437-
data = types.RealtimeCreateTemporaryTokenResponse.parse_obj(response.json())
438-
return data.token

assemblyai/streaming/v3/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from .async_client import AsyncStreamingClient
12
from .client import StreamingClient
23
from .models import (
34
BeginEvent,
@@ -9,6 +10,7 @@
910
SpeechStartedEvent,
1011
StreamingClientOptions,
1112
StreamingError,
13+
StreamingErrorCodes,
1214
StreamingEvents,
1315
StreamingParameters,
1416
StreamingPiiPolicy,
@@ -21,6 +23,7 @@
2123
)
2224

2325
__all__ = [
26+
"AsyncStreamingClient",
2427
"BeginEvent",
2528
"Encoding",
2629
"EventMessage",
@@ -31,6 +34,7 @@
3134
"StreamingClient",
3235
"StreamingClientOptions",
3336
"StreamingError",
37+
"StreamingErrorCodes",
3438
"StreamingEvents",
3539
"StreamingParameters",
3640
"StreamingPiiPolicy",

0 commit comments

Comments
 (0)