From eef24027d7a3cbcabc8fa78eb1ab5efa79443a2b Mon Sep 17 00:00:00 2001 From: STC Date: Sat, 13 Sep 2025 18:59:09 +0900 Subject: [PATCH 1/2] gpt-realtime --- contributed/conversationalAI/assets.js | 2 ++ examples/io/tcp/websocketsclient/main.js | 2 +- .../chatAudioIO/workers/openAIRealtime.js | 36 +++++++++++-------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/contributed/conversationalAI/assets.js b/contributed/conversationalAI/assets.js index af652eef41..e199a09e20 100644 --- a/contributed/conversationalAI/assets.js +++ b/contributed/conversationalAI/assets.js @@ -256,8 +256,10 @@ const assets = { { name:"Alloy", id:"alloy" }, { name:"Ash", id:"ash" }, { name:"Ballad", id:"ballad" }, + { name:"Cedar", id:"cedar" }, { name:"Coral", id:"coral" }, { name:"Echo", id:"echo" }, + { name:"Marin", id:"marin" }, { name:"Sage", id:"sage" }, { name:"Shimmer", id:"shimmer" }, { name:"Verse", id:"verse" }, diff --git a/examples/io/tcp/websocketsclient/main.js b/examples/io/tcp/websocketsclient/main.js index c9f1be1045..1ec44169bf 100644 --- a/examples/io/tcp/websocketsclient/main.js +++ b/examples/io/tcp/websocketsclient/main.js @@ -19,9 +19,9 @@ const ws = new WebSocketClient({ ...device.network.wss, host: "api.openai.com", path: "/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01", + path: "/v1/realtime?model=gpt-realtime", port: 443, headers: [ - ["OpenAI-Beta", "realtime=v1"], ["Authorization", `Bearer ${apiKey}`], ], onReadable(count, options) { diff --git a/modules/network/services/chatAudioIO/workers/openAIRealtime.js b/modules/network/services/chatAudioIO/workers/openAIRealtime.js index 9d2a53574d..6645e587ce 100644 --- a/modules/network/services/chatAudioIO/workers/openAIRealtime.js +++ b/modules/network/services/chatAudioIO/workers/openAIRealtime.js @@ -29,9 +29,8 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { constructor(options) { super(options); this.host = "api.openai.com"; - this.path = `/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01`; + this.path = `/v1/realtime?model=gpt-realtime`; this.headers = [ - ["OpenAI-Beta", "realtime=v1"], ["Authorization", `Bearer ${config.openAIKey}`] ]; this.audioPrefix = audioPrefix; @@ -47,18 +46,27 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { }); this.session = { instructions, - voice, - turn_detection: { - type: "server_vad", - threshold: 0.5, - prefix_padding_ms: 300, - silence_duration_ms: 500, - create_response: true + audio: { + input: { + format: { type: 'audio/pcma' }, + turn_detection: { + type: "server_vad", + threshold: 0.5, + prefix_padding_ms: 300, + silence_duration_ms: 500, + create_response: true + }, + transcription: { + model: 'whisper-1', + } + }, + output: { + voice + } }, - input_audio_format: "g711_alaw", - input_audio_transcription: { model: 'whisper-1' }, tools, tool_choice: "auto", + type: "realtime", } } generateId(prefix, length = 21) { @@ -71,7 +79,7 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { return `${prefix}${str}`; } isBase64(result, current, name) { - return (result?.type == "response.audio.delta") && (name == "delta"); + return (result?.type == "response.output_audio.delta") && (name == "delta"); } sendAudio(message) { const buffer = new Uint8Array(this.inputBuffer, message.offset, message.size); @@ -124,10 +132,10 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { 'input_audio_buffer.committed'(message) { this.post("listen"); } - 'response.audio_transcript.delta'(message) { + 'response.output_audio_transcript.delta'(message) { this.postMessage({ id:"receiveOutputText", text:message.delta, more:true }); } - 'response.audio_transcript.done'(message) { + 'response.output_audio_transcript.done'(message) { this.postMessage({ id:"receiveOutputText", text:"" }); } 'response.created'(message) { From 6003aa4b4a7c91f1e0c21c620025f4c840355071 Mon Sep 17 00:00:00 2001 From: STC Date: Sat, 13 Sep 2025 21:05:34 +0900 Subject: [PATCH 2/2] openAIRealtime: conversation using function calling --- .../services/chatAudioIO/workers/openAIRealtime.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/network/services/chatAudioIO/workers/openAIRealtime.js b/modules/network/services/chatAudioIO/workers/openAIRealtime.js index 6645e587ce..c57d0674fd 100644 --- a/modules/network/services/chatAudioIO/workers/openAIRealtime.js +++ b/modules/network/services/chatAudioIO/workers/openAIRealtime.js @@ -98,6 +98,9 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { }, event_id: this.generateId('event_'), }); + this.sendJSON({ + type: 'response.create', + }) } sendText(message) { this.sendJSON({ @@ -143,9 +146,11 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker { this.postMessage({ id:"receiveOutputText", text:"", more:true }); } 'response.done'(message) { - this.parser.copy(this.silence); - this.parser.done(); - this.post("speak"); + if(message.response?.output[0]?.type === 'messaege') { + this.parser.copy(this.silence); + this.parser.done(); + this.post("speak"); + } } 'response.output_item.done'(message) { const { item } = message;