Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
4e43c82
remove debugging (#2)
hiteshjoshi Apr 18, 2025
46dbd53
Merge pull request #158 from hiteshjoshi/main
dongri May 14, 2025
c9cff36
add newest Realtime voices
Sep 11, 2025
561b198
add dedicated RealtimeSIP client
Sep 11, 2025
2ea3ae1
add accept/reject/refer SIP calls to v1 client
Sep 11, 2025
1021ab5
rename module to reflect endpoint naming
Sep 11, 2025
d6af149
add doc comment
Sep 11, 2025
0e08254
strongly typed realtime model enum
Sep 11, 2025
5ce9e26
use one-off unit struct for "type": "realtime" field
Sep 11, 2025
01484ba
use string response body type, add hangup endpoint, add notes on reject
Sep 11, 2025
8d37893
make OpenAIClient cloneable
Sep 11, 2025
73c50ef
accept anything Into<String> for accept call constructor
Sep 11, 2025
9924018
fix: add new helper method that ignores the response body, use it for
Sep 11, 2025
48a7515
fix: use enum instead of unit struct for `type: "realtime"` due to
Sep 11, 2025
7cca2e6
add webhook payload type to library
Sep 11, 2025
4e1b385
wip: rename session update fields
Sep 11, 2025
c2bcd93
add new GA shape of audio config
Sep 11, 2025
1f605a5
Wrap Session in enum so both regular realtime and transcription only
Sep 11, 2025
12f7157
nevermind, response.create does take something like a session config
Sep 12, 2025
c46f2a6
make ItemType PartialEq for filtering
Sep 12, 2025
56d15ea
partialEq for ItemRole
Sep 12, 2025
ac95318
more partialEqs
Sep 12, 2025
297cebe
fix: make audio format components public
Sep 12, 2025
dac2603
make turn detection support both server (changed) and semantic vad mode
Sep 12, 2025
6deffe0
pass model information to realtime sip client
Sep 12, 2025
80b726f
reorder MaxOutputTokens variants so inf matches first
Sep 12, 2025
8a7b019
experiment fix: treat "inf" as string
Sep 12, 2025
147e549
rename conversation.item.created -> .added
Sep 12, 2025
de0704b
more naming changes
Sep 12, 2025
459505c
rename audio => output_audio
Sep 12, 2025
6db1e23
more audio -> output_audio renaming
Sep 12, 2025
7f3b5b9
add conversation.item.done event
Sep 12, 2025
e31fa95
include all `session` fields in AcceptCallRequest
Sep 15, 2025
9cdcb77
spacing + doc comm
Sep 15, 2025
8686f47
fix: fix call_id casing in RealtimeSipClient
Sep 15, 2025
3d2aec3
add delta item for input audio transcription
Sep 15, 2025
d486710
feature: add MCP tool definitions
Sep 17, 2025
4f6c023
add MCP tool choice type
Sep 17, 2025
4e49490
add response mcp call arguments events
Sep 17, 2025
02c2994
add response.mcp.* events
Sep 17, 2025
447fb64
feat: mcp list tools events
Sep 17, 2025
1b34f42
support MCP-related conversation items
Sep 17, 2025
5ab96f6
clarifying doc comments
Sep 17, 2025
4de1b32
temporary debug eprintln
Sep 17, 2025
7f1e16f
removed useless debug statement
Sep 17, 2025
543749f
supposedly this now works without passing model
Sep 17, 2025
917a3b6
remove model param from realtime endpoint URL
Sep 17, 2025
5f30cc0
fix: remove output_index
Sep 17, 2025
7702309
rename tool_call
Sep 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/realtime/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use tokio_tungstenite::{
MaybeTlsStream, WebSocketStream,
};

pub mod sip;

const WSS_URL: &str = "wss://api.openai.com/v1/realtime";

pub struct RealtimeClient {
Expand Down
181 changes: 181 additions & 0 deletions src/realtime/api/sip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
use serde::{Deserialize, Serialize};

use super::*;

/// Intended for connecting to an already existing Realtime session spawned by accepting an incoming SIP call from e.g. Twilio.
pub struct RealtimeSipClient {
pub wss_url: String,
pub api_key: String,
pub call_id: String,
}

impl RealtimeSipClient {
pub fn new(api_key: String, call_id: String) -> Self {
let wss_url = std::env::var("WSS_URL").unwrap_or_else(|_| WSS_URL.to_owned());
Self::new_with_endpoint(wss_url, api_key, call_id)
}

pub fn new_with_endpoint(wss_url: String, api_key: String, call_id: String) -> Self {
Self {
wss_url,
api_key,
call_id,
}
}

pub async fn connect(
&self,
) -> Result<
(
SplitSink<WebSocketStream<MaybeTlsStream<TcpStream>>, Message>,
SplitStream<WebSocketStream<MaybeTlsStream<TcpStream>>>,
),
Box<dyn std::error::Error>,
> {
let url = format!("{}?call_id={}", self.wss_url, self.call_id);
let mut request = url.into_client_request()?;
let api_key = self.api_key.clone();
request
.headers_mut()
.insert("Authorization", format!("Bearer {api_key}").parse()?);
let (ws_stream, _) = connect_async(request).await?;
let (write, read) = ws_stream.split();
Ok((write, read))
}
}

/// This is the payload of a `realtime.call.incoming` event webhook which is what OpenAI sends to your application when a call hits the SIP endpoint for your project.
/// Exposes some convenience methods for when a call comes from Twilio which is one of the more common use cases. `openai_call_id()` is what you will need to use accept/hangup endpoints.
///
/// # Example
/// ```rust
/// const INSTRUCTIONS: &str = "You are a helpful assistant.";
/// #[axum::debug_handler]
/// async fn call_webhook(
/// State(mut state): State<AppState>,
/// Json(event): Json<RealtimeCallIncoming>,
/// ) -> impl IntoResponse {
/// let number = event.caller_number();
/// let call_id = event.openai_call_id();
/// let twilio_sid = event.twilio_call_sid();
/// let account_sid = event.twilio_account_sid();
/// log::info!(
/// "Call coming in from {:?} with OpenAi ID {:?}, Twilio SID {:?} / account SID {:?}",
/// number,
/// call_id,
/// twilio_sid,
/// account_sid
/// );
///
/// let accept_call = AcceptCallRequest::new(INSTRUCTIONS, RealtimeModel::GptRealtime);
///
/// match state.openai_client.accept_call(call_id, accept_call).await {
/// Ok(_) => {
/// log::info!("Accepted call {}", call_id);
/// }
/// Err(err) => {
/// log::error!("Failed to accept call {}: {}", call_id, err);
/// }
/// };
/// ()
/// }
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeCallIncoming {
pub id: String,
/// Always `event`.
pub object: String,
pub created_at: i64,
/// This should always be `realtime.call.incoming`.
#[serde(rename = "type")]
pub event_type: String,
/// Contains the actual unique data per call. Look for `call_id` here or call `openai_call_id()`.
pub data: RealTimeCallIncomingData,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealTimeCallIncomingData {
pub call_id: String,
pub sip_headers: Vec<SipHeader>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SipHeader {
pub name: String,
pub value: String,
}

impl RealtimeCallIncoming {
/// Get the call ID from the event data
pub fn openai_call_id(&self) -> &str {
&self.data.call_id
}

/// Extract the caller's phone number from the "From" SIP header
pub fn caller_number(&self) -> Option<String> {
self.data
.sip_headers
.iter()
.find(|header| header.name == "From")
.and_then(|header| {
// Parse the From header to extract the phone number
// Format: "+48123123123" <sip:[email protected]:5060>;tag=...
if let Some(start) = header.value.find('"') {
if let Some(end) = header.value[start + 1..].find('"') {
return Some(header.value[start + 1..start + 1 + end].to_string());
}
}
None
})
}

/// Get the Twilio Call SID from the X-Twilio-CallSid SIP header
pub fn twilio_call_sid(&self) -> Option<&str> {
self.data
.sip_headers
.iter()
.find(|header| header.name == "X-Twilio-CallSid")
.map(|header| header.value.as_str())
}

/// Get the Twilio Account SID from the X-Twilio-AccountSid SIP header
pub fn twilio_account_sid(&self) -> Option<&str> {
self.data
.sip_headers
.iter()
.find(|header| header.name == "X-Twilio-AccountSid")
.map(|header| header.value.as_str())
}

/// Get a specific SIP header value by name
pub fn get_sip_header(&self, name: &str) -> Option<&str> {
self.data
.sip_headers
.iter()
.find(|header| header.name == name)
.map(|header| header.value.as_str())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_twilio_event() {
let json = r#"{"id": "evt_68bc6828707881908be189456b84cc07", "object": "event", "created_at": 1757177896, "type": "realtime.call.incoming", "data": {"call_id": "rtc_c5b6f97fe96f4c809b78916a9ac15748", "sip_headers": [{"name": "From", "value": "\"+48123123123\" <sip:[email protected]:5060>;tag=82568196_c3356d0b_03f1232a-01cf-4a4a-af25-bac077219d08"}, {"name": "X-Twilio-CallSid", "value": "CA080dd4bebc0320639d7ae33b82e80481"}, {"name": "X-Twilio-AccountSid", "value": "fake_data"}]}}"#;

let event: RealtimeCallIncoming = serde_json::from_str(json).unwrap();

assert_eq!(
event.openai_call_id(),
"rtc_c5b6f97fe96f4c809b78916a9ac15748"
);
assert_eq!(event.caller_number(), Some("+48123123123".to_string()));
assert_eq!(
event.twilio_call_sid(),
Some("CA080dd4bebc0320639d7ae33b82e80481")
);
assert_eq!(event.twilio_account_sid(), Some("fake_data"));
}
}
4 changes: 2 additions & 2 deletions src/realtime/client_event.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use serde::{Deserialize, Serialize};
use tokio_tungstenite::tungstenite::Message;

use crate::realtime::types::{Item, Session};
use crate::realtime::types::{Item, RealtimeSession, Session};

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct SessionUpdate {
Expand Down Expand Up @@ -58,7 +58,7 @@ pub struct ConversationItemDelete {
pub struct ResponseCreate {
#[serde(skip_serializing_if = "Option::is_none")]
pub event_id: Option<String>,
pub response: Option<Session>,
pub response: Option<RealtimeSession>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
Expand Down
Loading