diff --git a/README.md b/README.md index 17cd7ca9..1d598cfb 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ Stream chats with the Responses API, transcribe and translate audio with Whisper - [Translate](#translate) - [Transcribe](#transcribe) - [Speech](#speech) + - [Real-Time](#real-time) - [Usage](#usage) - [Errors](#errors-1) - [Development](#development) @@ -1657,6 +1658,33 @@ File.binwrite('demo.mp3', response) # => mp3 file that plays: "This is a speech test!" ``` +### Realtime + +The [Realtime API](https://platform.openai.com/docs/guides/realtime) allows you to create a live speech-to-speech session with an OpenAI model. It responds with a session object, plus a client_secret key which contains a usable ephemeral API token that can be used to [authenticate browser clients for a WebRTC connection](https://platform.openai.com/docs/guides/realtime#connect-with-webrtc). + +```ruby +response = client.realtime.create(parameters: { model: "gpt-4o-realtime-preview-2024-12-17" }) +puts "ephemeral key: #{response.dig('client_secret', 'value')}" +# => "ephemeral key: ek_abc123" +``` + +Then in the client-side Javascript application, make a POST request to the Real-Time API with the ephemeral key and the SDP offer. + +```js +const OPENAI_REALTIME_URL = 'https://api.openai.com/v1/realtime/sessions' +const MODEL = 'gpt-4o-realtime-preview-2024-12-17' + +const response = await fetch(`${OPENAI_REALTIME_URL}?model=${MODEL}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/sdp', + 'Authorization': `Bearer ${ephemeralKey}`, + 'OpenAI-Beta': 'realtime=v1' + }, + body: offer.sdp +}) +``` + ### Usage The Usage API provides information about the cost of various OpenAI services within your organization. diff --git a/lib/openai.rb b/lib/openai.rb index 978206f4..907975ef 100644 --- a/lib/openai.rb +++ b/lib/openai.rb @@ -10,6 +10,7 @@ require_relative "openai/assistants" require_relative "openai/threads" require_relative "openai/messages" +require_relative "openai/realtime" require_relative "openai/runs" require_relative "openai/run_steps" require_relative "openai/vector_stores" diff --git a/lib/openai/client.rb b/lib/openai/client.rb index e06e5e8c..473b568e 100644 --- a/lib/openai/client.rb +++ b/lib/openai/client.rb @@ -1,3 +1,4 @@ +# rubocop:disable Metrics/ClassLength module OpenAI class Client include OpenAI::HTTP @@ -92,6 +93,10 @@ def batches @batches ||= OpenAI::Batches.new(client: self) end + def realtime + @realtime ||= OpenAI::Realtime.new(client: self) + end + def moderations(parameters: {}) json_post(path: "/moderations", parameters: parameters) end @@ -132,3 +137,4 @@ def inspect end end end +# rubocop:enable Metrics/ClassLength diff --git a/lib/openai/realtime.rb b/lib/openai/realtime.rb new file mode 100644 index 00000000..0282d3d2 --- /dev/null +++ b/lib/openai/realtime.rb @@ -0,0 +1,19 @@ +module OpenAI + class Realtime + def initialize(client:) + @client = client.beta(realtime: "v1") + end + + # Create a new real-time session with OpenAI. + # + # This method sets up a new session for real-time voice interaction with an OpenAI model. + # It returns session details that can be used to establish a WebRTC connection. + # + # @param parameters [Hash] parameters for the session (see: https://platform.openai.com/docs/api-reference/realtime-sessions/create) + # @return [Hash] Session details including session ID, ICE servers, and other + # connection information + def create(parameters: {}) + @client.json_post(path: "/realtime/sessions", parameters: parameters) + end + end +end diff --git a/spec/fixtures/cassettes/realtime_session_create.yml b/spec/fixtures/cassettes/realtime_session_create.yml new file mode 100644 index 00000000..09842979 --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create.yml @@ -0,0 +1,112 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-18"}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-18", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/fixtures/cassettes/realtime_session_create_with_params.yml b/spec/fixtures/cassettes/realtime_session_create_with_params.yml new file mode 100644 index 00000000..bc57574f --- /dev/null +++ b/spec/fixtures/cassettes/realtime_session_create_with_params.yml @@ -0,0 +1,113 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/realtime/sessions + body: + encoding: UTF-8 + string: '{"model":"gpt-4o-realtime-preview-2024-12-17","voice":"alloy","instructions":"You + are a helpful assistant."}' + headers: + Content-Type: + - application/json + Authorization: + - Bearer + Openai-Beta: + - realtime=v1 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + User-Agent: + - Ruby + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 18 Dec 2024 12:35:56 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Openai-Organization: + - org-123456789 + Openai-Processing-Ms: + - '180' + Openai-Version: + - '2024-12-17' + Strict-Transport-Security: + - max-age=15724800; includeSubDomains + X-Ratelimit-Limit-Requests: + - '10000' + X-Ratelimit-Remaining-Requests: + - '9998' + X-Ratelimit-Reset-Requests: + - 6ms + X-Request-Id: + - req_987654321fedcba + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=def456; path=/; expires=Wed, 18-Dec-24 13:05:56 GMT; domain=.api.openai.com; + HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 987654321fedcba-IAD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |- + { + "id": "session_real123abc", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "expires_at": 1734626783, + "modalities": [ + "audio", + "text" + ], + "instructions": "You are a helpful assistant.", + "voice": "alloy", + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf", + "tools": [], + "ice_servers": [ + { + "urls": ["stun:stun1.example.net"] + }, + { + "urls": ["turn:turn.example.org"], + "username": "user123", + "credential": "password123" + } + ], + "session_id": "session_real123abc", + "audio_input_config": { + "sampling_rate": 16000, + "channels": 1, + "encoding": "opus" + }, + "audio_output_config": { + "sampling_rate": 24000, + "channels": 1, + "encoding": "opus" + } + } + recorded_at: Wed, 18 Dec 2024 12:35:56 GMT +recorded_with: VCR 6.3.1 diff --git a/spec/openai/client/realtime_spec.rb b/spec/openai/client/realtime_spec.rb new file mode 100644 index 00000000..ad1be929 --- /dev/null +++ b/spec/openai/client/realtime_spec.rb @@ -0,0 +1,31 @@ +RSpec.describe OpenAI::Realtime do + let(:client) { OpenAI::Client.new } + let(:realtime) { client.realtime } + + describe "#create" do + it "uses the specified model" do + model = "gpt-4o-realtime-preview-2024-12-18" + VCR.use_cassette("realtime_session_create") do + response = realtime.create(parameters: { model: model }) + expect(response["model"]).to eq(model) + end + end + + context "with additional parameters" do + it "sends all parameters to the API" do + parameters = { + model: "gpt-4o-realtime-preview-2024-12-17", + voice: "alloy", + instructions: "You are a helpful assistant." + } + + VCR.use_cassette("realtime_session_create_with_params") do + response = realtime.create(parameters: parameters) + expect(response["model"]).to eq(parameters[:model]) + expect(response["voice"]).to eq(parameters[:voice]) + expect(response["instructions"]).to eq(parameters[:instructions]) + end + end + end + end +end