openai
diff --git a/‎examples/realtime/demo.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/realtime/demo.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/realtime/no_ui_demo.py‎
Lines changed: 82 additions & 7 deletions b/‎examples/realtime/no_ui_demo.py‎
Lines changed: 82 additions & 7 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agents/agent.py‎
Lines changed: 1 addition & 1 deletion b/‎src/agents/agent.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agents/guardrail.py‎
Lines changed: 1 addition & 1 deletion b/‎src/agents/guardrail.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agents/handoffs.py‎
Lines changed: 32 additions & 14 deletions b/‎src/agents/handoffs.py‎
Lines changed: 32 additions & 14 deletions
diff --git a/‎src/agents/models/chatcmpl_converter.py‎
Lines changed: 1 addition & 1 deletion b/‎src/agents/models/chatcmpl_converter.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agents/models/openai_responses.py‎
Lines changed: 1 addition & 1 deletion b/‎src/agents/models/openai_responses.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agents/realtime/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎src/agents/realtime/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/agents/realtime/agent.py‎
Lines changed: 10 additions & 1 deletion b/‎src/agents/realtime/agent.py‎
Lines changed: 10 additions & 1 deletion
@@ -93,7 +93,8 @@ async def _on_event(self, event: RealtimeSessionEvent) -> None:
                 self.ui.add_transcript("Audio ended")
             elif event.type == "audio":
                 np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
-                self.ui.play_audio(np_audio)
+                # Play audio in a separate thread to avoid blocking the event loop
+                await asyncio.to_thread(self.ui.play_audio, np_audio)
             elif event.type == "audio_interrupted":
                 self.ui.add_transcript("Audio interrupted")
             elif event.type == "error":
 
@@ -1,5 +1,8 @@
 import asyncio
+import queue
 import sys
+import threading
+from typing import Any
 
 import numpy as np
 import sounddevice as sd
@@ -46,14 +49,77 @@ def __init__(self) -> None:
         self.audio_player: sd.OutputStream | None = None
         self.recording = False
 
+        # Audio output state for callback system
+        self.output_queue: queue.Queue[Any] = queue.Queue(maxsize=10)  # Buffer more chunks
+        self.interrupt_event = threading.Event()
+        self.current_audio_chunk: np.ndarray | None = None  # type: ignore
+        self.chunk_position = 0
+
+    def _output_callback(self, outdata, frames: int, time, status) -> None:
+        """Callback for audio output - handles continuous audio stream from server."""
+        if status:
+            print(f"Output callback status: {status}")
+
+        # Check if we should clear the queue due to interrupt
+        if self.interrupt_event.is_set():
+            # Clear the queue and current chunk state
+            while not self.output_queue.empty():
+                try:
+                    self.output_queue.get_nowait()
+                except queue.Empty:
+                    break
+            self.current_audio_chunk = None
+            self.chunk_position = 0
+            self.interrupt_event.clear()
+            outdata.fill(0)
+            return
+
+        # Fill output buffer from queue and current chunk
+        outdata.fill(0)  # Start with silence
+        samples_filled = 0
+
+        while samples_filled < len(outdata):
+            # If we don't have a current chunk, try to get one from queue
+            if self.current_audio_chunk is None:
+                try:
+                    self.current_audio_chunk = self.output_queue.get_nowait()
+                    self.chunk_position = 0
+                except queue.Empty:
+                    # No more audio data available - this causes choppiness
+                    # Uncomment next line to debug underruns:
+                    # print(f"Audio underrun: {samples_filled}/{len(outdata)} samples filled")
+                    break
+
+            # Copy data from current chunk to output buffer
+            remaining_output = len(outdata) - samples_filled
+            remaining_chunk = len(self.current_audio_chunk) - self.chunk_position
+            samples_to_copy = min(remaining_output, remaining_chunk)
+
+            if samples_to_copy > 0:
+                chunk_data = self.current_audio_chunk[
+                    self.chunk_position : self.chunk_position + samples_to_copy
+                ]
+                # More efficient: direct assignment for mono audio instead of reshape
+                outdata[samples_filled : samples_filled + samples_to_copy, 0] = chunk_data
+                samples_filled += samples_to_copy
+                self.chunk_position += samples_to_copy
+
+                # If we've used up the entire chunk, reset for next iteration
+                if self.chunk_position >= len(self.current_audio_chunk):
+                    self.current_audio_chunk = None
+                    self.chunk_position = 0
+
     async def run(self) -> None:
         print("Connecting, may take a few seconds...")
 
-        # Initialize audio player
+        # Initialize audio player with callback
+        chunk_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
         self.audio_player = sd.OutputStream(
             channels=CHANNELS,
             samplerate=SAMPLE_RATE,
             dtype=FORMAT,
+            callback=self._output_callback,
+            blocksize=chunk_size,  # Match our chunk timing for better alignment
         )
         self.audio_player.start()
 
@@ -146,15 +212,24 @@ async def _on_event(self, event: RealtimeSessionEvent) -> None:
             elif event.type == "audio_end":
                 print("Audio ended")
             elif event.type == "audio":
-                # Play audio through speakers
+                # Enqueue audio for callback-based playback
                 np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
-                if self.audio_player:
-                    try:
-                        self.audio_player.write(np_audio)
-                    except Exception as e:
-                        print(f"Audio playback error: {e}")
+                try:
+                    self.output_queue.put_nowait(np_audio)
+                except queue.Full:
+                    # Queue is full - only drop if we have significant backlog
+                    # This prevents aggressive dropping that could cause choppiness
+                    if self.output_queue.qsize() > 8:  # Keep some buffer
+                        try:
+                            self.output_queue.get_nowait()
+                            self.output_queue.put_nowait(np_audio)
+                        except queue.Empty:
+                            pass
+                    # If queue isn't too full, just skip this chunk to avoid blocking
             elif event.type == "audio_interrupted":
                 print("Audio interrupted")
+                # Signal the output callback to clear its queue and state
+                self.interrupt_event.set()
             elif event.type == "error":
                 print(f"Error: {event.error}")
             elif event.type == "history_updated":
 
@@ -7,7 +7,7 @@ requires-python = ">=3.9"
 license = "MIT"
 authors = [{ name = "OpenAI", email = "[email protected]" }]
 dependencies = [
-    "openai>=1.96.0, <2",
+    "openai>=1.96.1, <2",
     "pydantic>=2.10, <3",
     "griffe>=1.5.6, <2",
     "typing-extensions>=4.12.2, <5",
 
@@ -158,7 +158,7 @@ class Agent(AgentBase, Generic[TContext]):
     usable with OpenAI models, using the Responses API.
     """
 
-    handoffs: list[Agent[Any] | Handoff[TContext]] = field(default_factory=list)
+    handoffs: list[Agent[Any] | Handoff[TContext, Any]] = field(default_factory=list)
     """Handoffs are sub-agents that the agent can delegate to. You can provide a list of handoffs,
     and the agent can choose to delegate to them if relevant. Allows for separation of concerns and
     modularity.
 
@@ -244,7 +244,7 @@ def decorator(
         return InputGuardrail(
             guardrail_function=f,
             # If not set, guardrail name uses the function’s name by default.
-            name=name if name else f.__name__
+            name=name if name else f.__name__,
         )
 
     if func is not None:
 
@@ -18,12 +18,15 @@
 from .util._types import MaybeAwaitable
 
 if TYPE_CHECKING:
-    from .agent import Agent
+    from .agent import Agent, AgentBase
 
 
 # The handoff input type is the type of data passed when the agent is called via a handoff.
 THandoffInput = TypeVar("THandoffInput", default=Any)
 
+# The agent type that the handoff returns
+TAgent = TypeVar("TAgent", bound="AgentBase[Any]", default="Agent[Any]")
+
 OnHandoffWithInput = Callable[[RunContextWrapper[Any], THandoffInput], Any]
 OnHandoffWithoutInput = Callable[[RunContextWrapper[Any]], Any]
 
@@ -52,7 +55,7 @@ class HandoffInputData:
 
 
 @dataclass
-class Handoff(Generic[TContext]):
+class Handoff(Generic[TContext, TAgent]):
     """A handoff is when an agent delegates a task to another agent.
     For example, in a customer support scenario you might have a "triage agent" that determines
     which agent should handle the user's request, and sub-agents that specialize in different
@@ -69,7 +72,7 @@ class Handoff(Generic[TContext]):
     """The JSON schema for the handoff input. Can be empty if the handoff does not take an input.
     """
 
-    on_invoke_handoff: Callable[[RunContextWrapper[Any], str], Awaitable[Agent[TContext]]]
+    on_invoke_handoff: Callable[[RunContextWrapper[Any], str], Awaitable[TAgent]]
     """The function that invokes the handoff. The parameters passed are:
     1. The handoff run context
     2. The arguments from the LLM, as a JSON string. Empty string if input_json_schema is empty.
@@ -100,20 +103,22 @@ class Handoff(Generic[TContext]):
     True, as it increases the likelihood of correct JSON input.
     """
 
-    is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True
+    is_enabled: bool | Callable[[RunContextWrapper[Any], AgentBase[Any]], MaybeAwaitable[bool]] = (
+        True
+    )
     """Whether the handoff is enabled. Either a bool or a Callable that takes the run context and
     agent and returns whether the handoff is enabled. You can use this to dynamically enable/disable
     a handoff based on your context/state."""
 
-    def get_transfer_message(self, agent: Agent[Any]) -> str:
+    def get_transfer_message(self, agent: AgentBase[Any]) -> str:
         return json.dumps({"assistant": agent.name})
 
     @classmethod
-    def default_tool_name(cls, agent: Agent[Any]) -> str:
+    def default_tool_name(cls, agent: AgentBase[Any]) -> str:
         return _transforms.transform_string_function_style(f"transfer_to_{agent.name}")
 
     @classmethod
-    def default_tool_description(cls, agent: Agent[Any]) -> str:
+    def default_tool_description(cls, agent: AgentBase[Any]) -> str:
         return (
             f"Handoff to the {agent.name} agent to handle the request. "
             f"{agent.handoff_description or ''}"
@@ -128,7 +133,7 @@ def handoff(
     tool_description_override: str | None = None,
     input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
     is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True,
-) -> Handoff[TContext]: ...
+) -> Handoff[TContext, Agent[TContext]]: ...
 
 
 @overload
@@ -141,7 +146,7 @@ def handoff(
     tool_name_override: str | None = None,
     input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
     is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True,
-) -> Handoff[TContext]: ...
+) -> Handoff[TContext, Agent[TContext]]: ...
 
 
 @overload
@@ -153,7 +158,7 @@ def handoff(
     tool_name_override: str | None = None,
     input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
     is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True,
-) -> Handoff[TContext]: ...
+) -> Handoff[TContext, Agent[TContext]]: ...
 
 
 def handoff(
@@ -163,8 +168,9 @@ def handoff(
     on_handoff: OnHandoffWithInput[THandoffInput] | OnHandoffWithoutInput | None = None,
     input_type: type[THandoffInput] | None = None,
     input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None,
-    is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True,
-) -> Handoff[TContext]:
+    is_enabled: bool
+    | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True,
+) -> Handoff[TContext, Agent[TContext]]:
     """Create a handoff from an agent.
 
     Args:
@@ -202,7 +208,7 @@ def handoff(
 
     async def _invoke_handoff(
         ctx: RunContextWrapper[Any], input_json: str | None = None
-    ) -> Agent[Any]:
+    ) -> Agent[TContext]:
         if input_type is not None and type_adapter is not None:
             if input_json is None:
                 _error_tracing.attach_error_to_current_span(
@@ -239,12 +245,24 @@ async def _invoke_handoff(
     # If there is a need, we can make this configurable in the future
     input_json_schema = ensure_strict_json_schema(input_json_schema)
 
+    async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool:
+        from .agent import Agent
+
+        assert callable(is_enabled), "is_enabled must be non-null here"
+        assert isinstance(agent_base, Agent), "Can't handoff to a non-Agent"
+        result = is_enabled(ctx, agent_base)
+
+        if inspect.isawaitable(result):
+            return await result
+
+        return result
+
     return Handoff(
         tool_name=tool_name,
         tool_description=tool_description,
         input_json_schema=input_json_schema,
         on_invoke_handoff=_invoke_handoff,
         input_filter=input_filter,
         agent_name=agent.name,
-        is_enabled=is_enabled,
+        is_enabled=_is_enabled if callable(is_enabled) else is_enabled,
     )
@@ -484,7 +484,7 @@ def tool_to_openai(cls, tool: Tool) -> ChatCompletionToolParam:
         )
 
     @classmethod
-    def convert_handoff_tool(cls, handoff: Handoff[Any]) -> ChatCompletionToolParam:
+    def convert_handoff_tool(cls, handoff: Handoff[Any, Any]) -> ChatCompletionToolParam:
         return {
             "type": "function",
             "function": {
 
@@ -370,7 +370,7 @@ def get_response_format(
     def convert_tools(
         cls,
         tools: list[Tool],
-        handoffs: list[Handoff[Any]],
+        handoffs: list[Handoff[Any, Any]],
     ) -> ConvertedTools:
         converted_tools: list[ToolParam] = []
         includes: list[ResponseIncludable] = []
 
@@ -30,6 +30,7 @@
     RealtimeToolEnd,
     RealtimeToolStart,
 )
+from .handoffs import realtime_handoff
 from .items import (
     AssistantMessageItem,
     AssistantText,
@@ -92,6 +93,8 @@
     "RealtimeAgentHooks",
     "RealtimeRunHooks",
     "RealtimeRunner",
+    # Handoffs
+    "realtime_handoff",
     # Config
     "RealtimeAudioFormat",
     "RealtimeClientMessage",
 
@@ -3,10 +3,11 @@
 import dataclasses
 import inspect
 from collections.abc import Awaitable
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any, Callable, Generic, cast
 
 from ..agent import AgentBase
+from ..handoffs import Handoff
 from ..lifecycle import AgentHooksBase, RunHooksBase
 from ..logger import logger
 from ..run_context import RunContextWrapper, TContext
@@ -53,6 +54,14 @@ class RealtimeAgent(AgentBase, Generic[TContext]):
     return a string.
     """
 
+    handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
+        default_factory=list
+    )
+    """Handoffs are sub-agents that the agent can delegate to. You can provide a list of handoffs,
+    and the agent can choose to delegate to them if relevant. Allows for separation of concerns and
+    modularity.
+    """
+
     hooks: RealtimeAgentHooks | None = None
     """A class that receives callbacks on various lifecycle events for this agent.
     """
Original file line number	Diff line number	Diff line change
`@@ -244,7 +244,7 @@ def decorator(`
`244`	`244`	`return InputGuardrail(`
`245`	`245`	`guardrail_function=f,`
`246`	`246`	`# If not set, guardrail name uses the function’s name by default.`
`247`		`- name=name if name else f.__name__`
	`247`	`+ name=name if name else f.__name__,`
`248`	`248`	`)`
`249`	`249`
`250`	`250`	`if func is not None:`
Original file line number	Diff line number	Diff line change
`@@ -484,7 +484,7 @@ def tool_to_openai(cls, tool: Tool) -> ChatCompletionToolParam:`
`484`	`484`	`)`
`485`	`485`
`486`	`486`	`@classmethod`
`487`		`- def convert_handoff_tool(cls, handoff: Handoff[Any]) -> ChatCompletionToolParam:`
	`487`	`+ def convert_handoff_tool(cls, handoff: Handoff[Any, Any]) -> ChatCompletionToolParam:`
`488`	`488`	`return {`
`489`	`489`	`"type": "function",`
`490`	`490`	`"function": {`