diff --git a/CHANGELOG.md b/CHANGELOG.md index f423c72..f7794ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [4.33.0] + +- Add streaming parameters to match the Python SDK: + - `voiceFocus` and `voiceFocusThreshold` (replaces the unreleased `noiseSuppressionModel` / `noiseSuppressionThreshold`) + - `continuousPartials` + - `customerSupportAudioCapture` (logs a warning when enabled — records session audio for AssemblyAI support) + - `webhookUrl`, `webhookAuthHeaderName`, `webhookAuthHeaderValue` +- Add `speaker` field to `StreamingWord` + ## [4.20.0] - Add `on_low_language_confidence` property to `LanguageDetectionOptions` diff --git a/package.json b/package.json index 833ae4f..e8331ff 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "assemblyai", - "version": "4.32.1", + "version": "4.33.0", "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", "engines": { "node": ">=18" diff --git a/src/services/streaming/service.ts b/src/services/streaming/service.ts index 1f0c6ef..1f204be 100644 --- a/src/services/streaming/service.ts +++ b/src/services/streaming/service.ts @@ -180,20 +180,74 @@ export class StreamingTranscriber { searchParams.set("max_speakers", this.params.maxSpeakers.toString()); } - if (this.params.noiseSuppressionModel) { + if (this.params.voiceFocus) { + searchParams.set("voice_focus", this.params.voiceFocus); + } + + if (this.params.voiceFocusThreshold !== undefined) { + searchParams.set( + "voice_focus_threshold", + this.params.voiceFocusThreshold.toString(), + ); + } + + if (this.params.continuousPartials !== undefined) { + searchParams.set( + "continuous_partials", + this.params.continuousPartials.toString(), + ); + } + + if (this.params.customerSupportAudioCapture) { + console.warn( + "`customerSupportAudioCapture=true` will record session audio. Only enable this when explicitly coordinating with AssemblyAI support.", + ); + searchParams.set( + "customer_support_audio_capture", + this.params.customerSupportAudioCapture.toString(), + ); + } + + if (this.params.webhookUrl) { + searchParams.set("webhook_url", this.params.webhookUrl); + } + + if (this.params.webhookAuthHeaderName) { + searchParams.set( + "webhook_auth_header_name", + this.params.webhookAuthHeaderName, + ); + } + + if (this.params.webhookAuthHeaderValue) { searchParams.set( - "noise_suppression_model", - this.params.noiseSuppressionModel, + "webhook_auth_header_value", + this.params.webhookAuthHeaderValue, ); } - if (this.params.noiseSuppressionThreshold !== undefined) { + if (this.params.includePartialTurns !== undefined) { searchParams.set( - "noise_suppression_threshold", - this.params.noiseSuppressionThreshold.toString(), + "include_partial_turns", + this.params.includePartialTurns.toString(), ); } + if (this.params.redactPii !== undefined) { + searchParams.set("redact_pii", this.params.redactPii.toString()); + } + + if (this.params.redactPiiPolicies !== undefined) { + searchParams.set( + "redact_pii_policies", + JSON.stringify(this.params.redactPiiPolicies), + ); + } + + if (this.params.redactPiiSub !== undefined) { + searchParams.set("redact_pii_sub", this.params.redactPiiSub); + } + if (this.params.llmGateway !== undefined) { searchParams.set("llm_gateway", JSON.stringify(this.params.llmGateway)); } diff --git a/src/types/streaming/index.ts b/src/types/streaming/index.ts index eccb8dc..7c0ec00 100644 --- a/src/types/streaming/index.ts +++ b/src/types/streaming/index.ts @@ -36,9 +36,18 @@ export type StreamingTranscriberParams = { inactivityTimeout?: number; speakerLabels?: boolean; maxSpeakers?: number; - noiseSuppressionModel?: NoiseSuppressionModel; - noiseSuppressionThreshold?: number; + voiceFocus?: VoiceFocusModel; + voiceFocusThreshold?: number; + continuousPartials?: boolean; + customerSupportAudioCapture?: boolean; + includePartialTurns?: boolean; + redactPii?: boolean; + redactPiiPolicies?: StreamingPiiPolicy[]; + redactPiiSub?: StreamingPiiSubstitution; llmGateway?: LLMGatewayConfig; + webhookUrl?: string; + webhookAuthHeaderName?: string; + webhookAuthHeaderValue?: string; }; export type StreamingEvents = @@ -69,7 +78,58 @@ export type StreamingSpeechModel = export type StreamingDomain = "medical-v1"; -export type NoiseSuppressionModel = "near-field" | "far-field"; +export type VoiceFocusModel = "near-field" | "far-field"; + +export type StreamingPiiSubstitution = "hash" | "entity_name"; + +export type StreamingPiiPolicy = + | "account_number" + | "banking_information" + | "blood_type" + | "credit_card_number" + | "credit_card_expiration" + | "credit_card_cvv" + | "date" + | "date_interval" + | "date_of_birth" + | "drivers_license" + | "drug" + | "duration" + | "email_address" + | "event" + | "filename" + | "gender_sexuality" + | "gender" + | "healthcare_number" + | "injury" + | "ip_address" + | "language" + | "location" + | "marital_status" + | "medical_condition" + | "medical_process" + | "money_amount" + | "nationality" + | "number_sequence" + | "passport_number" + | "password" + | "person_age" + | "person_name" + | "phone_number" + | "physical_attribute" + | "political_affiliation" + | "occupation" + | "organization" + | "organization_medical_facility" + | "religion" + | "sexuality" + | "statistics" + | "time" + | "url" + | "us_social_security_number" + | "username" + | "vehicle_id" + | "zodiac_sign"; export type StreamingTokenParams = { expires_in_seconds: number; @@ -112,6 +172,7 @@ export type StreamingWord = { confidence: number; text: string; word_is_final: boolean; + speaker?: string; }; export type TerminationEvent = { diff --git a/tests/unit/streaming.test.ts b/tests/unit/streaming.test.ts index 4c40204..29e87bb 100644 --- a/tests/unit/streaming.test.ts +++ b/tests/unit/streaming.test.ts @@ -145,19 +145,47 @@ describe("streaming", () => { ); }); - it("should include noise_suppression_model and noise_suppression_threshold in connection URL", async () => { + it("should include redact_pii params and include_partial_turns in connection URL", async () => { await cleanup(); WS.clean(); - const wsUrl = `${websocketBaseUrl}?token=123&sample_rate=16000&speech_model=universal-streaming-english&noise_suppression_model=near-field&noise_suppression_threshold=0.5`; + const policies = ["email_address", "phone_number"] as const; + const wsUrl = + `${websocketBaseUrl}?token=123&sample_rate=16000` + + `&speech_model=universal-streaming-english` + + `&include_partial_turns=false` + + `&redact_pii=true` + + `&redact_pii_policies=${encodeURIComponent(JSON.stringify(policies))}` + + `&redact_pii_sub=entity_name`; server = new WS(wsUrl); rt = new StreamingTranscriber({ websocketBaseUrl, token: "123", sampleRate: 16_000, speechModel: "universal-streaming-english", - noiseSuppressionModel: "near-field", - noiseSuppressionThreshold: 0.5, + includePartialTurns: false, + redactPii: true, + redactPiiPolicies: [...policies], + redactPiiSub: "entity_name", + }); + onOpen = jest.fn(); + rt.on("open", onOpen); + await connect(rt, server); + }); + + it("should include voice_focus and voice_focus_threshold in connection URL", async () => { + await cleanup(); + WS.clean(); + + const wsUrl = `${websocketBaseUrl}?token=123&sample_rate=16000&speech_model=universal-streaming-english&voice_focus=near-field&voice_focus_threshold=0.5`; + server = new WS(wsUrl); + rt = new StreamingTranscriber({ + websocketBaseUrl, + token: "123", + sampleRate: 16_000, + speechModel: "universal-streaming-english", + voiceFocus: "near-field", + voiceFocusThreshold: 0.5, }); onOpen = jest.fn(); rt.on("open", onOpen);