Skip to content

Tests for realtime runner #1079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Python File",
"type": "debugpy",
"request": "launch",
"program": "${file}"
}
]
}
29 changes: 21 additions & 8 deletions examples/realtime/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@

import numpy as np

from agents.realtime import RealtimeSession

# Add the current directory to path so we can import ui
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from agents import function_tool
from agents.realtime import RealtimeAgent, RealtimeSession, RealtimeSessionEvent
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSessionEvent

if TYPE_CHECKING:
from .ui import AppUI
Expand Down Expand Up @@ -38,23 +40,34 @@ def get_weather(city: str) -> str:

class Example:
def __init__(self) -> None:
self.session = RealtimeSession(agent)
self.ui = AppUI()
self.ui.connected = asyncio.Event()
self.ui.last_audio_item_id = None
# Set the audio callback
self.ui.set_audio_callback(self.on_audio_recorded)

self.session: RealtimeSession | None = None

async def run(self) -> None:
self.session.add_listener(self.on_event)
await self.session.connect()
self.ui.set_is_connected(True)
await self.ui.run_async()
# Start UI in a separate task instead of waiting for it to complete
ui_task = asyncio.create_task(self.ui.run_async())

# Set up session immediately without waiting for UI to finish
runner = RealtimeRunner(agent)
async with await runner.run() as session:
self.session = session
self.ui.set_is_connected(True)
async for event in session:
await self.on_event(event)

# Wait for UI task to complete when session ends
await ui_task

async def on_audio_recorded(self, audio_bytes: bytes) -> None:
"""Called when audio is recorded by the UI."""
try:
# Send the audio to the session
assert self.session is not None
await self.session.send_audio(audio_bytes)
except Exception as e:
self.ui.log_message(f"Error sending audio: {e}")
Expand Down Expand Up @@ -87,8 +100,8 @@ async def on_event(self, event: RealtimeSessionEvent) -> None:
pass
elif event.type == "history_added":
pass
elif event.type == "raw_transport_event":
self.ui.log_message(f"Raw transport event: {event.data}")
elif event.type == "raw_model_event":
self.ui.log_message(f"Raw model event: {event.data}")
else:
self.ui.log_message(f"Unknown event type: {event.type}")
except Exception as e:
Expand Down
50 changes: 34 additions & 16 deletions src/agents/realtime/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
from .config import APIKeyOrKeyFunc
from .config import (
RealtimeAudioFormat,
RealtimeClientMessage,
RealtimeInputAudioTranscriptionConfig,
RealtimeModelName,
RealtimeRunConfig,
RealtimeSessionModelSettings,
RealtimeTurnDetectionConfig,
RealtimeUserInput,
RealtimeUserInputMessage,
RealtimeUserInputText,
)
from .events import (
RealtimeAgentEndEvent,
RealtimeAgentStartEvent,
Expand All @@ -10,42 +21,49 @@
RealtimeHandoffEvent,
RealtimeHistoryAdded,
RealtimeHistoryUpdated,
RealtimeRawTransportEvent,
RealtimeRawModelEvent,
RealtimeSessionEvent,
RealtimeToolEnd,
RealtimeToolStart,
)
from .session import RealtimeSession
from .transport import (
RealtimeModelName,
RealtimeSessionTransport,
RealtimeTransportConnectionOptions,
RealtimeTransportListener,
from .model import (
RealtimeModel,
RealtimeModelConfig,
RealtimeModelListener,
)
from .runner import RealtimeRunner
from .session import RealtimeSession

__all__ = [
"RealtimeAgent",
"RealtimeAgentHooks",
"RealtimeRunHooks",
"RealtimeSession",
"RealtimeSessionListener",
"RealtimeSessionListenerFunc",
"APIKeyOrKeyFunc",
"RealtimeRunner",
"RealtimeRunConfig",
"RealtimeSessionModelSettings",
"RealtimeInputAudioTranscriptionConfig",
"RealtimeTurnDetectionConfig",
"RealtimeAudioFormat",
"RealtimeClientMessage",
"RealtimeUserInput",
"RealtimeUserInputMessage",
"RealtimeUserInputText",
"RealtimeModelName",
"RealtimeSessionTransport",
"RealtimeTransportListener",
"RealtimeTransportConnectionOptions",
"RealtimeModel",
"RealtimeModelListener",
"RealtimeModelConfig",
"RealtimeSessionEvent",
"RealtimeAgentStartEvent",
"RealtimeAgentEndEvent",
"RealtimeHandoffEvent",
"RealtimeToolStart",
"RealtimeToolEnd",
"RealtimeRawTransportEvent",
"RealtimeRawModelEvent",
"RealtimeAudioEnd",
"RealtimeAudio",
"RealtimeAudioInterrupted",
"RealtimeError",
"RealtimeHistoryUpdated",
"RealtimeHistoryAdded",
"RealtimeSession",
]
51 changes: 26 additions & 25 deletions src/agents/realtime/config.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,44 @@
from __future__ import annotations

import inspect
from typing import (
Any,
Callable,
Literal,
Union,
)

from typing_extensions import NotRequired, TypeAlias, TypedDict

from ..model_settings import ToolChoice
from ..tool import FunctionTool
from ..util._types import MaybeAwaitable
from ..tool import Tool

RealtimeModelName: TypeAlias = Union[
Literal[
"gpt-4o-realtime-preview",
"gpt-4o-mini-realtime-preview",
"gpt-4o-realtime-preview-2025-06-03",
"gpt-4o-realtime-preview-2024-12-17",
"gpt-4o-realtime-preview-2024-10-01",
"gpt-4o-mini-realtime-preview-2024-12-17",
],
str,
]
"""The name of a realtime model."""


class RealtimeClientMessage(TypedDict):
type: str # explicitly required
other_data: NotRequired[dict[str, Any]]


class UserInputText(TypedDict):
class RealtimeUserInputText(TypedDict):
type: Literal["input_text"]
text: str


class RealtimeUserInputMessage(TypedDict):
type: Literal["message"]
role: Literal["user"]
content: list[UserInputText]
content: list[RealtimeUserInputText]


RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
Expand All @@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
threshold: NotRequired[float]


class RealtimeSessionConfig(TypedDict):
api_key: NotRequired[APIKeyOrKeyFunc]
model: NotRequired[str]
class RealtimeSessionModelSettings(TypedDict):
"""Model settings for a realtime model session."""

model_name: NotRequired[RealtimeModelName]

instructions: NotRequired[str]
modalities: NotRequired[list[Literal["text", "audio"]]]
voice: NotRequired[str]
Expand All @@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
turn_detection: NotRequired[RealtimeTurnDetectionConfig]

tool_choice: NotRequired[ToolChoice]
tools: NotRequired[list[FunctionTool]]


APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
"""Either an API key or a function that returns an API key."""

tools: NotRequired[list[Tool]]

async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
"""Get the API key from the key or key function."""
if key is None:
return None
elif isinstance(key, str):
return key

result = key()
if inspect.isawaitable(result):
return await result
return result
class RealtimeRunConfig(TypedDict):
model_settings: NotRequired[RealtimeSessionModelSettings]

# TODO (rm) Add tracing support
# tracing: NotRequired[RealtimeTracingConfig | None]
# TODO (rm) Add guardrail support
# TODO (rm) Add history audio storage config
20 changes: 11 additions & 9 deletions src/agents/realtime/events.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal, Union

Expand All @@ -7,7 +9,7 @@
from ..tool import Tool
from .agent import RealtimeAgent
from .items import RealtimeItem
from .transport_events import RealtimeTransportAudioEvent, RealtimeTransportEvent
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent


@dataclass
Expand Down Expand Up @@ -93,16 +95,16 @@ class RealtimeToolEnd:


@dataclass
class RealtimeRawTransportEvent:
"""Forwards raw events from the transport layer."""
class RealtimeRawModelEvent:
"""Forwards raw events from the model layer."""

data: RealtimeTransportEvent
"""The raw data from the transport layer."""
data: RealtimeModelEvent
"""The raw data from the model layer."""

info: RealtimeEventInfo
"""Common info for all events, such as the context."""

type: Literal["raw_transport_event"] = "raw_transport_event"
type: Literal["raw_model_event"] = "raw_model_event"


@dataclass
Expand All @@ -119,8 +121,8 @@ class RealtimeAudioEnd:
class RealtimeAudio:
"""Triggered when the agent generates new audio to be played."""

audio: RealtimeTransportAudioEvent
"""The audio event from the transport layer."""
audio: RealtimeModelAudioEvent
"""The audio event from the model layer."""

info: RealtimeEventInfo
"""Common info for all events, such as the context."""
Expand Down Expand Up @@ -187,7 +189,7 @@ class RealtimeHistoryAdded:
RealtimeHandoffEvent,
RealtimeToolStart,
RealtimeToolEnd,
RealtimeRawTransportEvent,
RealtimeRawModelEvent,
RealtimeAudioEnd,
RealtimeAudio,
RealtimeAudioInterrupted,
Expand Down
Loading
Loading