Skip to content

Realtime: input timeout trigger event #1552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/realtime/app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ async def _serialize_event(self, event: RealtimeSessionEvent) -> dict[str, Any]:
}
elif event.type == "error":
base_event["error"] = str(event.error) if hasattr(event, "error") else "Unknown error"
elif event.type == "input_audio_timeout_triggered":
pass
else:
assert_never(event)

Expand Down
3 changes: 3 additions & 0 deletions src/agents/realtime/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ class RealtimeTurnDetectionConfig(TypedDict):
threshold: NotRequired[float]
"""The threshold for voice activity detection."""

idle_timeout_ms: NotRequired[int]
"""Threshold for server-vad to trigger a response if the user is idle for this duration."""


class RealtimeSessionModelSettings(TypedDict):
"""Model settings for a realtime model session."""
Expand Down
11 changes: 11 additions & 0 deletions src/agents/realtime/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ class RealtimeGuardrailTripped:
type: Literal["guardrail_tripped"] = "guardrail_tripped"


@dataclass
class RealtimeInputAudioTimeoutTriggered:
"""Called when the model detects a period of inactivity/silence from the user."""

info: RealtimeEventInfo
"""Common info for all events, such as the context."""

type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"


RealtimeSessionEvent: TypeAlias = Union[
RealtimeAgentStartEvent,
RealtimeAgentEndEvent,
Expand All @@ -230,5 +240,6 @@ class RealtimeGuardrailTripped:
RealtimeHistoryUpdated,
RealtimeHistoryAdded,
RealtimeGuardrailTripped,
RealtimeInputAudioTimeoutTriggered,
]
"""An event emitted by the realtime session."""
10 changes: 10 additions & 0 deletions src/agents/realtime/model_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ class RealtimeModelInputAudioTranscriptionCompletedEvent:

type: Literal["input_audio_transcription_completed"] = "input_audio_transcription_completed"

@dataclass
class RealtimeModelInputAudioTimeoutTriggeredEvent:
"""Input audio timeout triggered."""

item_id: str
audio_start_ms: int
audio_end_ms: int

type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"

@dataclass
class RealtimeModelTranscriptDeltaEvent:
Expand Down Expand Up @@ -174,6 +183,7 @@ class RealtimeModelRawServerEvent:
RealtimeModelAudioEvent,
RealtimeModelAudioInterruptedEvent,
RealtimeModelAudioDoneEvent,
RealtimeModelInputAudioTimeoutTriggeredEvent,
RealtimeModelInputAudioTranscriptionCompletedEvent,
RealtimeModelTranscriptDeltaEvent,
RealtimeModelItemUpdatedEvent,
Expand Down
31 changes: 27 additions & 4 deletions src/agents/realtime/openai_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import json
import os
from datetime import datetime
from typing import Any, Callable, Literal
from typing import Annotated, Any, Callable, Literal, Union

import pydantic
import websockets
Expand Down Expand Up @@ -52,7 +52,7 @@
SessionTracingTracingConfiguration as OpenAISessionTracingConfiguration,
SessionUpdateEvent as OpenAISessionUpdateEvent,
)
from pydantic import TypeAdapter
from pydantic import BaseModel, Field, TypeAdapter
from typing_extensions import assert_never
from websockets.asyncio.client import ClientConnection

Expand Down Expand Up @@ -83,6 +83,7 @@
RealtimeModelErrorEvent,
RealtimeModelEvent,
RealtimeModelExceptionEvent,
RealtimeModelInputAudioTimeoutTriggeredEvent,
RealtimeModelInputAudioTranscriptionCompletedEvent,
RealtimeModelItemDeletedEvent,
RealtimeModelItemUpdatedEvent,
Expand Down Expand Up @@ -128,6 +129,22 @@ async def get_api_key(key: str | Callable[[], MaybeAwaitable[str]] | None) -> st
return os.getenv("OPENAI_API_KEY")


class _InputAudioBufferTimeoutTriggeredEvent(BaseModel):
type: Literal["input_audio_buffer.timeout_triggered"]
event_id: str
audio_start_ms: int
audio_end_ms: int
item_id: str

AllRealtimeServerEvents = Annotated[
Union[
OpenAIRealtimeServerEvent,
_InputAudioBufferTimeoutTriggeredEvent,
],
Field(discriminator="type"),
]


class OpenAIRealtimeWebSocketModel(RealtimeModel):
"""A model that uses OpenAI's WebSocket API."""

Expand Down Expand Up @@ -462,8 +479,8 @@ async def _handle_ws_event(self, event: dict[str, Any]):
try:
if "previous_item_id" in event and event["previous_item_id"] is None:
event["previous_item_id"] = "" # TODO (rm) remove
parsed: OpenAIRealtimeServerEvent = TypeAdapter(
OpenAIRealtimeServerEvent
parsed: AllRealtimeServerEvents = TypeAdapter(
AllRealtimeServerEvents
).validate_python(event)
except pydantic.ValidationError as e:
logger.error(f"Failed to validate server event: {event}", exc_info=True)
Expand Down Expand Up @@ -554,6 +571,12 @@ async def _handle_ws_event(self, event: dict[str, Any]):
or parsed.type == "response.output_item.done"
):
await self._handle_output_item(parsed.item)
elif parsed.type == "input_audio_buffer.timeout_triggered":
await self._emit_event(RealtimeModelInputAudioTimeoutTriggeredEvent(
item_id=parsed.item_id,
audio_start_ms=parsed.audio_start_ms,
audio_end_ms=parsed.audio_end_ms,
))

def _update_created_session(self, session: OpenAISessionObject) -> None:
self._created_session = session
Expand Down
7 changes: 7 additions & 0 deletions src/agents/realtime/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
RealtimeHandoffEvent,
RealtimeHistoryAdded,
RealtimeHistoryUpdated,
RealtimeInputAudioTimeoutTriggered,
RealtimeRawModelEvent,
RealtimeSessionEvent,
RealtimeToolEnd,
Expand Down Expand Up @@ -227,6 +228,12 @@ async def on_event(self, event: RealtimeModelEvent) -> None:
await self._put_event(
RealtimeHistoryUpdated(info=self._event_info, history=self._history)
)
elif event.type == "input_audio_timeout_triggered":
await self._put_event(
RealtimeInputAudioTimeoutTriggered(
info=self._event_info,
)
)
elif event.type == "transcript_delta":
# Accumulate transcript text for guardrail debouncing per item_id
item_id = event.item_id
Expand Down