|
13 | 13 |
|
14 | 14 | from assemblyai.streaming.v3 import ( |
15 | 15 | NoiseSuppressionModel, |
| 16 | + SpeakerRevisionEvent, |
16 | 17 | SpeechModel, |
17 | 18 | SpeechStartedEvent, |
18 | 19 | StreamingClient, |
@@ -911,6 +912,88 @@ def test_turn_event_with_word_speakers(): |
911 | 912 | assert [w.speaker for w in event.words] == ["A", "B"] |
912 | 913 |
|
913 | 914 |
|
| 915 | +def test_speaker_revision_event_parses(): |
| 916 | + # Given: a SpeakerRevision payload as emitted by the server (revision words |
| 917 | + # use the same Word schema as Turn — start/end/confidence/text/word_is_final/speaker) |
| 918 | + data = { |
| 919 | + "type": "SpeakerRevision", |
| 920 | + "turn_order": 3, |
| 921 | + "speaker_label": "B", |
| 922 | + "words": [ |
| 923 | + { |
| 924 | + "start": 1000, |
| 925 | + "end": 1200, |
| 926 | + "confidence": 0.9, |
| 927 | + "text": "hello", |
| 928 | + "word_is_final": True, |
| 929 | + "speaker": "B", |
| 930 | + }, |
| 931 | + { |
| 932 | + "start": 1210, |
| 933 | + "end": 1400, |
| 934 | + "confidence": 0.88, |
| 935 | + "text": "world", |
| 936 | + "word_is_final": True, |
| 937 | + "speaker": "A", |
| 938 | + }, |
| 939 | + ], |
| 940 | + } |
| 941 | + |
| 942 | + # When: parsed |
| 943 | + event = SpeakerRevisionEvent.parse_obj(data) |
| 944 | + |
| 945 | + # Then: the revision carries the corrected per-word and turn-level speakers |
| 946 | + assert event.type == "SpeakerRevision" |
| 947 | + assert event.turn_order == 3 |
| 948 | + assert event.speaker_label == "B" |
| 949 | + assert [w.speaker for w in event.words] == ["B", "A"] |
| 950 | + |
| 951 | + |
| 952 | +def test_speaker_revision_event_dispatched_to_handler(mocker: MockFixture): |
| 953 | + # Given: a SpeakerRevision frame on the wire and a handler registered |
| 954 | + revision_json = json.dumps( |
| 955 | + { |
| 956 | + "type": "SpeakerRevision", |
| 957 | + "turn_order": 5, |
| 958 | + "speaker_label": "A", |
| 959 | + "words": [ |
| 960 | + { |
| 961 | + "start": 500, |
| 962 | + "end": 700, |
| 963 | + "confidence": 0.95, |
| 964 | + "text": "yes", |
| 965 | + "word_is_final": True, |
| 966 | + "speaker": "A", |
| 967 | + }, |
| 968 | + ], |
| 969 | + } |
| 970 | + ) |
| 971 | + fake_ws = _FakeWebSocket(recv_script=[revision_json]) |
| 972 | + mocker.patch( |
| 973 | + "assemblyai.streaming.v3.client.websocket_connect", |
| 974 | + return_value=fake_ws, |
| 975 | + ) |
| 976 | + received = [] |
| 977 | + client = StreamingClient( |
| 978 | + StreamingClientOptions(api_key="test", api_host="api.example.com") |
| 979 | + ) |
| 980 | + client.on(StreamingEvents.SpeakerRevision, lambda _c, event: received.append(event)) |
| 981 | + |
| 982 | + # When: the client reads the frame |
| 983 | + client.connect(_default_params()) |
| 984 | + deadline = time.monotonic() + 2.0 |
| 985 | + while time.monotonic() < deadline and not received: |
| 986 | + time.sleep(0.02) |
| 987 | + client.disconnect(terminate=False) |
| 988 | + |
| 989 | + # Then: the handler is invoked with a parsed SpeakerRevisionEvent |
| 990 | + assert len(received) == 1 |
| 991 | + assert isinstance(received[0], SpeakerRevisionEvent) |
| 992 | + assert received[0].turn_order == 5 |
| 993 | + assert received[0].speaker_label == "A" |
| 994 | + assert [w.speaker for w in received[0].words] == ["A"] |
| 995 | + |
| 996 | + |
914 | 997 | def test_speech_model_required(): |
915 | 998 | """Test that omitting speech_model raises a validation error.""" |
916 | 999 | with pytest.raises(ValidationError): |
|
0 commit comments