diff --git a/app/src/App.tsx b/app/src/App.tsx index e88b437186..120fc7529d 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -21,6 +21,7 @@ import PersistRehydrationScreen from './components/PersistRehydrationScreen'; import SecurityBanner from './components/SecurityBanner'; import GlobalUpsellBanner from './components/upsell/GlobalUpsellBanner'; import AppWalkthrough from './components/walkthrough/AppWalkthrough'; +import { useVoiceSpeak } from './features/human/voice/useVoiceSpeak'; import { MascotFrameProducer } from './features/meet/MascotFrameProducer'; import { useNotchBootSync } from './hooks/useNotchBootSync'; import { I18nProvider } from './lib/i18n/I18nContext'; @@ -155,6 +156,9 @@ function AppShellDesktop() { const location = useLocation(); const navigate = useNavigate(); const { snapshot, isBootstrapping } = useCoreState(); + // Play proactive assistant speech (e.g. spoken approval prompts for + // voice-initiated turns — Phase 4 of #3148) app-wide. + useVoiceSpeak(); const activeAccountId = useAppSelector(state => state.accounts.activeAccountId); // On /accounts, only the agent view keeps the tab bar + its reserved // bottom padding. Any other selected "app" (e.g. WhatsApp) takes the diff --git a/app/src/features/human/voice/useVoiceSpeak.test.ts b/app/src/features/human/voice/useVoiceSpeak.test.ts new file mode 100644 index 0000000000..04e21d24d1 --- /dev/null +++ b/app/src/features/human/voice/useVoiceSpeak.test.ts @@ -0,0 +1,67 @@ +import { renderHook } from '@testing-library/react'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { useVoiceSpeak } from './useVoiceSpeak'; + +const hoisted = vi.hoisted(() => ({ + onMock: vi.fn<(event: string, cb: (...args: unknown[]) => void) => void>(), + offMock: vi.fn(), + synthMock: vi.fn(), + playMock: vi.fn(), + stopMock: vi.fn(), +})); + +vi.mock('../../../services/socketService', () => ({ + socketService: { on: hoisted.onMock, off: hoisted.offMock }, +})); +vi.mock('./ttsClient', () => ({ synthesizeSpeech: hoisted.synthMock })); +vi.mock('./audioPlayer', () => ({ playBase64Audio: hoisted.playMock, swallowAudioStop: vi.fn() })); + +/** Grab the `voice:speak` handler the hook registered with socketService. */ +function speakHandler(): (...args: unknown[]) => void { + const call = hoisted.onMock.mock.calls.find(([event]) => event === 'voice:speak'); + if (!call) throw new Error('useVoiceSpeak did not subscribe to voice:speak'); + return call[1]; +} + +describe('useVoiceSpeak', () => { + beforeEach(() => { + vi.clearAllMocks(); + hoisted.synthMock.mockResolvedValue({ + audio_base64: 'AAA=', + audio_mime: 'audio/mpeg', + visemes: [], + }); + hoisted.playMock.mockResolvedValue({ ended: Promise.resolve(), stop: hoisted.stopMock }); + }); + + it('synthesizes and plays the spoken prompt on voice:speak', async () => { + renderHook(() => useVoiceSpeak()); + speakHandler()({ text: 'Post to Slack. Say yes to confirm.', source: 'approval' }); + + await vi.waitFor(() => expect(hoisted.playMock).toHaveBeenCalledTimes(1)); + expect(hoisted.synthMock).toHaveBeenCalledWith('Post to Slack. Say yes to confirm.'); + expect(hoisted.playMock).toHaveBeenCalledWith('AAA=', 'audio/mpeg', expect.any(Object)); + }); + + it('ignores an empty/whitespace prompt without synthesizing', async () => { + renderHook(() => useVoiceSpeak()); + speakHandler()({ text: ' ' }); + await Promise.resolve(); + expect(hoisted.synthMock).not.toHaveBeenCalled(); + expect(hoisted.playMock).not.toHaveBeenCalled(); + }); + + it('ignores a malformed payload', async () => { + renderHook(() => useVoiceSpeak()); + speakHandler()({ notText: true }); + await Promise.resolve(); + expect(hoisted.synthMock).not.toHaveBeenCalled(); + }); + + it('unsubscribes and stops playback on unmount', () => { + const { unmount } = renderHook(() => useVoiceSpeak()); + unmount(); + expect(hoisted.offMock).toHaveBeenCalledWith('voice:speak', expect.any(Function)); + }); +}); diff --git a/app/src/features/human/voice/useVoiceSpeak.ts b/app/src/features/human/voice/useVoiceSpeak.ts new file mode 100644 index 0000000000..b94976702e --- /dev/null +++ b/app/src/features/human/voice/useVoiceSpeak.ts @@ -0,0 +1,72 @@ +import debug from 'debug'; +import { useEffect, useRef } from 'react'; + +import { socketService } from '../../../services/socketService'; +import { type PlaybackHandle, playBase64Audio, swallowAudioStop } from './audioPlayer'; +import { synthesizeSpeech } from './ttsClient'; + +const log = debug('human:voice-speak'); + +/** Hard cap on a single spoken prompt, guarding against runaway TTS. */ +const MAX_SPEAK_MS = 20_000; + +/** Payload of the core `voice:speak` socket event (mirrors `SpeakRequest`). */ +interface SpeakPayload { + text: string; + source?: string | null; +} + +function isSpeakPayload(value: unknown): value is SpeakPayload { + return ( + typeof value === 'object' && + value !== null && + typeof (value as { text?: unknown }).text === 'string' + ); +} + +/** + * Play proactive assistant speech requested by the core via `voice:speak`. + * + * Today's only producer is the voice-native approval surface: when a sensitive + * action is parked for approval during a voice-initiated turn, the core asks the + * assistant to speak the confirmation aloud so a hands-free user can answer + * "yes"/"no" by voice (Phase 4 of #3148). Mounted once, app-wide, so the prompt + * is heard even when the mascot view isn't open — it synthesizes through the + * same TTS path the mascot uses and plays the returned audio directly. + */ +export function useVoiceSpeak(): void { + const handleRef = useRef(null); + + useEffect(() => { + const onSpeak = (...args: unknown[]): void => { + const payload = args[0]; + if (!isSpeakPayload(payload)) return; + const text = payload.text.trim(); + if (!text) return; + log('voice:speak source=%s chars=%d', payload.source ?? 'unknown', text.length); + + void (async () => { + try { + const { audio_base64: audioBase64, audio_mime: audioMime } = await synthesizeSpeech(text); + if (!audioBase64) return; + // Stop any in-flight prompt before starting the next one. + handleRef.current?.stop(); + const handle = await playBase64Audio(audioBase64, audioMime || 'audio/mpeg', { + maxDurationMs: MAX_SPEAK_MS, + }); + handleRef.current = handle; + handle.ended.catch(swallowAudioStop); + } catch (err) { + log('voice:speak playback failed: %o', err); + } + })(); + }; + + socketService.on('voice:speak', onSpeak); + return () => { + socketService.off('voice:speak', onSpeak); + handleRef.current?.stop(); + handleRef.current = null; + }; + }, []); +} diff --git a/app/src/pages/Conversations.tsx b/app/src/pages/Conversations.tsx index bc3b3cae46..f7658b925e 100644 --- a/app/src/pages/Conversations.tsx +++ b/app/src/pages/Conversations.tsx @@ -293,7 +293,9 @@ const Conversations = ({ // timer's reference point. const sendingThreadIdRef = useRef(null); // Ref so the mount-time dictation event handler can call the latest send fn. - const handleSendMessageRef = useRef<((text?: string) => Promise) | null>(null); + const handleSendMessageRef = useRef< + ((text?: string, opts?: { voice?: boolean }) => Promise) | null + >(null); // Previous inference status for the sending thread; lets the rearm effect // distinguish "status was just cleared (chat_done / chat_error)" from // "status was never set yet (in-flight turn pre-status)". @@ -438,10 +440,12 @@ const Conversations = ({ customEvent.preventDefault(); - // When autoSend is set (hotkey dictation), dispatch the transcript directly - // to the agent without going through the text composer. + // When autoSend is set (hotkey dictation / always-on), dispatch the + // transcript directly to the agent without going through the text + // composer. Tag it voice-initiated so the core speaks any approval prompt + // aloud for sensitive actions (Phase 4 of #3148). if (customEvent.detail?.autoSend) { - void handleSendMessageRef.current?.(text); + void handleSendMessageRef.current?.(text, { voice: true }); return; } @@ -672,7 +676,7 @@ const Conversations = ({ } }; - const handleSendMessage = async (text?: string) => { + const handleSendMessage = async (text?: string, opts?: { voice?: boolean }) => { if (pendingSendRef.current) return; const normalized = text ?? inputValue; @@ -786,6 +790,7 @@ const Conversations = ({ model: CHAT_MODEL_ID, profileId: selectedAgentProfileId, locale: uiLocale, + voice: opts?.voice ?? false, }); trackEvent('chat_message_sent'); // Backend accepted the send; lifecycle ('started' → 'streaming') now diff --git a/app/src/pages/__tests__/Conversations.render.test.tsx b/app/src/pages/__tests__/Conversations.render.test.tsx index 6dc3f4fae5..868da96d54 100644 --- a/app/src/pages/__tests__/Conversations.render.test.tsx +++ b/app/src/pages/__tests__/Conversations.render.test.tsx @@ -708,6 +708,7 @@ describe('Conversations — smoke render (#1123 welcome-lock removal)', () => { model: 'reasoning-v1', profileId: 'default', locale: 'en', + voice: false, }); }); @@ -732,6 +733,7 @@ describe('Conversations — smoke render (#1123 welcome-lock removal)', () => { model: 'reasoning-v1', profileId: 'default', locale: 'en', + voice: true, }); }); }); @@ -784,6 +786,7 @@ describe('Conversations — smoke render (#1123 welcome-lock removal)', () => { model: 'reasoning-v1', profileId: 'default', locale: 'en', + voice: false, }); expect(screen.getByRole('button', { name: 'Send message' })).toBeDisabled(); resolveSend?.(); @@ -1149,6 +1152,7 @@ describe('Conversations — smoke render (#1123 welcome-lock removal)', () => { model: 'reasoning-v1', profileId: 'default', locale: 'en', + voice: false, }); }); }); @@ -1224,6 +1228,7 @@ describe('Conversations — smoke render (#1123 welcome-lock removal)', () => { model: 'reasoning-v1', profileId: 'default', locale: 'en', + voice: false, }); }); }); diff --git a/app/src/services/chatService.ts b/app/src/services/chatService.ts index 01198515ef..8dbe7f83f2 100644 --- a/app/src/services/chatService.ts +++ b/app/src/services/chatService.ts @@ -963,6 +963,13 @@ export interface ChatSendParams { * (default) aborts the running turn. */ queueMode?: QueueMode | null; + /** + * `true` when this turn was voice-initiated (dictation / always-on + * listening). The core speaks the approval prompt aloud for sensitive + * actions on voice turns so a hands-free user can answer by voice + * (Phase 4 of #3148). Omitted/`false` keeps typed turns visual-only. + */ + voice?: boolean | null; } /** @@ -989,6 +996,7 @@ export async function chatSend(params: ChatSendParams): Promise { profile_id: params.profileId ?? undefined, locale: params.locale ?? undefined, queue_mode: params.queueMode ?? undefined, + voice: params.voice ?? undefined, }, }); } diff --git a/docs/voice-system-actions.md b/docs/voice-system-actions.md index 9c098d6712..8cb8733a8b 100644 --- a/docs/voice-system-actions.md +++ b/docs/voice-system-actions.md @@ -591,14 +591,31 @@ Shipped on the Windows machine (2026-06-02): --- -## Phase 4 — Polish ⏳ Not Started +## Phase 4 — Polish 🔨 In progress > Voice confirmation loop, UI indicator, computer control onboarding. **Planned:** -- TTS confirmation before executing sensitive actions ("Opening Music — confirm?") -- Always-on status indicator (notch pill from PR #3166 will handle this automatically) -- Computer control (`mouse`/`keyboard` tools) toggle in Settings onboarding +- [x] **TTS confirmation before executing sensitive actions** — ✅ Done (voice-native approval; see below) +- [x] Always-on status indicator (notch pill from PR #3166 handles this automatically) +- [ ] Computer control (`mouse`/`keyboard` tools) toggle in Settings onboarding + +### Change 4.1 — Voice-native approval (spoken TTS confirmation) ✅ Done + +**Status:** ✅ Shipped. The existing `ApprovalGate` already classifies sensitive agent tool calls and parks them for a yes/no decision, but the prompt was **visual-only** (the in-app approval card). A hands-free / always-on user looking away from the screen never heard it. This makes the gate **voice-native**: when a sensitive action is parked during a **voice-initiated** turn, the assistant **speaks** the prompt aloud and the user answers by voice. + +**Decisions (agreed up front):** make the *existing* approval gate voice-native (not a parallel voice-fast-path gate), and speak **only for voice-initiated turns** (typed approvals stay visual-only). + +**Fix — reuses the approval gate, the turn-origin label, and the overlay socket-bridge pattern:** +- **Origin flag** — a `voice: bool` on `ApprovalChatContext` (set from a new optional `voice` param on the `channel_web_chat` RPC / `chat:start` socket event), stamped onto `DomainEvent::ApprovalRequested { is_voice }` at publish time in `approval/gate.rs`. The frontend tags **dictation / always-on auto-sends** (`Conversations.tsx` → `chatSend({ voice: true })`); typed turns omit it. +- **Voice approval surface** — `src/openhuman/voice/approval_surface.rs` (new), an `EventHandler` mirroring `telegram/approval_surface.rs`: on `ApprovalRequested { is_voice: true }` it builds a short spoken line (`spoken_prompt`: `". Say yes to confirm, or no to cancel."`) and publishes it. Registered at startup alongside the web/telegram surfaces. +- **Speak primitive** — `src/openhuman/voice/speak_bus.rs` (new), a `publish_speak` / `subscribe_speak_events` broadcast mirroring `overlay/bus.rs`; `core/socketio.rs` bridges it to a `voice:speak` Socket.IO event (next to the `overlay:attention` bridge). +- **Frontend playback** — `useVoiceSpeak` (new, mounted app-wide in `AppShellDesktop`) subscribes to `voice:speak` and plays it through the existing TTS pipeline (`synthesizeSpeech` → `playBase64Audio`), so the prompt is heard even when the mascot view isn't open. +- **Spoken answer — no new code:** a voice "yes"/"no" rides the existing transcription → auto-send → `web.rs` ingress yes/no router → `approval_decide`. + +**Tests:** Rust — `spoken_prompt` formatter, the `is_voice` gate (speaks on `true`, **silent on `false`** and on non-approval events, no-speak on empty summary), `speak_bus` round-trip. Frontend (Vitest) — `useVoiceSpeak` synthesizes + plays on `voice:speak`, ignores empty/malformed payloads, unsubscribes on unmount. + +**Known follow-up:** the spoken suffix ("Say yes to confirm") is built server-side in English for v1; localizing it through the i18n system (the spoken text isn't a frontend string) is deferred. --- @@ -652,6 +669,6 @@ From live agent-in-the-loop testing on 2026-06-03 (grounded in `~/.openhuman/log | 2 | Text-based "Hey Tiny" wake word | ✅ Done (interim; gates delivery, strips phrase) | | 3 | Local command router (intent classifier) | ✅ Done & wired (recognized intents run on the ≤500ms local path; Unknown defers to agent) | | 3 | On-device audio wake-word model | ⏳ Not started (text-based match is the interim) | -| 4 | Voice confirmation loop | ⏳ Not started | +| 4 | Voice confirmation loop (spoken TTS approval, Change 4.1) | ✅ Done (voice-native approval gate: speaks the prompt for voice-initiated turns; spoken yes/no) | | 4 | Computer-control onboarding toggle | ⏳ Not started | | 4 | Always-on UI indicator | ✅ Done (notch PR #3166) | diff --git a/src/core/event_bus/events.rs b/src/core/event_bus/events.rs index 596c611bb4..9e53c191af 100644 --- a/src/core/event_bus/events.rs +++ b/src/core/event_bus/events.rs @@ -385,6 +385,12 @@ pub enum DomainEvent { /// Socket.IO client id (room) to surface the approval question to, /// when known. `None` for non-chat callers. client_id: Option, + /// Whether the gated turn was **voice-initiated** (dictation / always-on + /// listening). When `true`, the voice approval surface + /// (`crate::openhuman::voice::approval_surface`) speaks the prompt aloud + /// so a hands-free user can answer by voice. `false` for typed turns — + /// they stay visual-only (the in-app approval card). Phase 4 of #3148. + is_voice: bool, }, /// User decided a pending approval. Published by `approval_decide` /// RPC handler after the gate's parked future resolves. diff --git a/src/core/event_bus/events_tests.rs b/src/core/event_bus/events_tests.rs index d05bcb0cb2..02b2e3ea30 100644 --- a/src/core/event_bus/events_tests.rs +++ b/src/core/event_bus/events_tests.rs @@ -525,6 +525,7 @@ fn approval_requested_does_not_surface_session_id() { args_redacted: serde_json::json!({ "tool_slug": "SLACK_SEND" }), thread_id: Some("t-1".to_string()), client_id: Some("c-1".to_string()), + is_voice: false, }; let dbg = format!("{event:?}"); assert!( diff --git a/src/core/jsonrpc.rs b/src/core/jsonrpc.rs index adc1c0c181..c78d055be0 100644 --- a/src/core/jsonrpc.rs +++ b/src/core/jsonrpc.rs @@ -2155,6 +2155,8 @@ pub async fn bootstrap_core_runtime(host_kind: crate::core::types::HostKind) { // frontend → every prompt dies at the TTL. Idempotent (Once-guarded). crate::openhuman::channels::providers::web::register_approval_surface_subscriber(); crate::openhuman::channels::providers::web::register_artifact_surface_subscriber(); + // Speak approval prompts for voice-initiated turns (Phase 4 of #3148). + crate::openhuman::voice::approval_surface::register_voice_approval_surface(); } else { log::error!( "[runtime] approval gate DISABLED (OPENHUMAN_APPROVAL_GATE=0 honored on host={}) — \ diff --git a/src/core/socketio.rs b/src/core/socketio.rs index b55c7d092a..154060466e 100644 --- a/src/core/socketio.rs +++ b/src/core/socketio.rs @@ -287,6 +287,10 @@ struct ChatStartPayload { locale: Option, #[serde(default)] queue_mode: Option, + /// `true` when the turn was voice-initiated (dictation / always-on). Speaks + /// the approval prompt aloud for sensitive actions (Phase 4 of #3148). + #[serde(default)] + voice: Option, } #[derive(Debug, Deserialize)] @@ -438,6 +442,7 @@ pub fn attach_socketio() -> (socketioxide::layer::SocketIoLayer, SocketIo) { payload.profile_id, payload.locale, payload.queue_mode, + payload.voice.unwrap_or(false), ) .await { @@ -549,6 +554,7 @@ pub fn spawn_web_channel_bridge(io: SocketIo) { }); let io_overlay = io.clone(); + let io_speak = io.clone(); let io_notify = io.clone(); let io_transcription = io.clone(); let io_auth = io.clone(); @@ -611,6 +617,36 @@ pub fn spawn_web_channel_bridge(io: SocketIo) { log::debug!("[socketio] overlay attention bridge stopped"); }); + // 3b. Proactive speech requests → broadcast so the UI speaks them via the + // mascot TTS pipeline (Phase 4 of #3148; mirrors the attention bridge). + tokio::spawn(async move { + let mut rx = crate::openhuman::voice::speak_bus::subscribe_speak_events(); + loop { + let request = match rx.recv().await { + Ok(req) => req, + Err(tokio::sync::broadcast::error::RecvError::Lagged(skipped)) => { + log::warn!( + "[socketio] dropped {} voice speak requests due to lag", + skipped + ); + continue; + } + Err(tokio::sync::broadcast::error::RecvError::Closed) => break, + }; + + if let Ok(payload) = serde_json::to_value(&request) { + log::debug!( + "[socketio] broadcast voice:speak source={:?} chars={}", + request.source, + request.text.len() + ); + let _ = io_speak.emit("voice:speak", &payload); + let _ = io_speak.emit("voice_speak", &payload); + } + } + log::debug!("[socketio] voice speak bridge stopped"); + }); + // 4. Core notification events → broadcast to all connected clients so // the in-app notification center picks them up regardless of which // chat session is active. Pattern mirrors the overlay attention diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs index 309a568ba5..66b2eca889 100644 --- a/src/openhuman/agent/harness/tool_loop_tests.rs +++ b/src/openhuman/agent/harness/tool_loop_tests.rs @@ -1319,6 +1319,7 @@ async fn auto_approved_external_effect_tool_runs_through_loop_without_parking() crate::openhuman::approval::ApprovalChatContext { thread_id: "t-e2e".into(), client_id: "c-e2e".into(), + voice: false, }, run_tool_call_loop( &provider, diff --git a/src/openhuman/approval/gate.rs b/src/openhuman/approval/gate.rs index a3657a3a9d..7790d7a773 100644 --- a/src/openhuman/approval/gate.rs +++ b/src/openhuman/approval/gate.rs @@ -61,6 +61,10 @@ const DEFAULT_APPROVAL_TTL: Duration = Duration::from_secs(60 * 10); pub struct ApprovalChatContext { pub thread_id: String, pub client_id: String, + /// `true` when the turn was voice-initiated (dictation / always-on). The + /// voice approval surface speaks the prompt aloud only for these turns; + /// typed turns stay visual-only. Phase 4 of #3148. Defaults `false`. + pub voice: bool, } tokio::task_local! { @@ -267,6 +271,8 @@ impl ApprovalGate { let chat_ctx = APPROVAL_CHAT_CONTEXT.try_with(|c| c.clone()).ok(); let chat_thread_id = chat_ctx.as_ref().map(|c| c.thread_id.clone()); let chat_client_id = chat_ctx.as_ref().map(|c| c.client_id.clone()); + // Voice-initiated turns get the prompt spoken aloud (Phase 4). + let chat_is_voice = chat_ctx.as_ref().map(|c| c.voice).unwrap_or(false); // Branch by origin. Web chat parks for an in-app approval; external // channel persists an audit row and TTL-denies (no routable approval @@ -432,6 +438,7 @@ impl ApprovalGate { args_redacted, thread_id: chat_thread_id.clone(), client_id: chat_client_id.clone(), + is_voice: chat_is_voice, }); tracing::info!( @@ -674,6 +681,7 @@ mod tests { ApprovalChatContext { thread_id: "t-test".into(), client_id: "c-test".into(), + voice: false, } } @@ -838,6 +846,7 @@ mod tests { let ctx = ApprovalChatContext { thread_id: "thread-42".into(), client_id: "client-1".into(), + voice: false, }; let origin = AgentTurnOrigin::WebChat { thread_id: "thread-42".into(), diff --git a/src/openhuman/channels/bus.rs b/src/openhuman/channels/bus.rs index 613994c616..187d637800 100644 --- a/src/openhuman/channels/bus.rs +++ b/src/openhuman/channels/bus.rs @@ -86,7 +86,7 @@ impl EventHandler for ChannelInboundSubscriber { crate::openhuman::channels::providers::web::subscribe_web_channel_events(); let request_id = match crate::openhuman::channels::providers::web::start_chat( - &client_id, &thread_id, message, None, None, None, None, None, + &client_id, &thread_id, message, None, None, None, None, None, false, ) .await { diff --git a/src/openhuman/channels/providers/telegram/approval_surface_tests.rs b/src/openhuman/channels/providers/telegram/approval_surface_tests.rs index 76ad9d4b46..f026524071 100644 --- a/src/openhuman/channels/providers/telegram/approval_surface_tests.rs +++ b/src/openhuman/channels/providers/telegram/approval_surface_tests.rs @@ -60,6 +60,7 @@ fn approval_event(thread_id: Option<&str>, client_id: Option<&str>) -> DomainEve args_redacted: serde_json::json!({"path": "notes/today.md"}), thread_id: thread_id.map(str::to_string), client_id: client_id.map(str::to_string), + is_voice: false, } } diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index 4a85812525..3c8232daca 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -474,6 +474,7 @@ pub(super) async fn set_test_forced_run_chat_task_error(message: Option<&str>) { *slot = message.map(str::to_string); } +#[allow(clippy::too_many_arguments)] pub async fn start_chat( client_id: &str, thread_id: &str, @@ -483,6 +484,7 @@ pub async fn start_chat( profile_id: Option, locale: Option, queue_mode: Option, + voice: bool, ) -> Result { let client_id = client_id.trim().to_string(); let thread_id = thread_id.trim().to_string(); @@ -708,6 +710,7 @@ pub async fn start_chat( let approval_ctx = crate::openhuman::approval::ApprovalChatContext { thread_id: thread_id_task.clone(), client_id: client_id_task.clone(), + voice, }; // Scope the matching `AgentTurnOrigin::WebChat` alongside the chat // context so the approval gate's origin-aware decision tree sees a @@ -901,6 +904,8 @@ fn dispatch_followups(followups: Vec, + /// `true` when the turn was voice-initiated (dictation / always-on + /// listening). Drives the voice-native approval surface — sensitive actions + /// get their confirmation prompt spoken aloud (Phase 4 of #3148). Defaults + /// `false` so typed turns stay visual-only. + #[serde(default)] + voice: bool, } #[derive(Debug, Deserialize)] @@ -1994,6 +2005,7 @@ struct WebCancelParams { thread_id: String, } +#[allow(clippy::too_many_arguments)] pub async fn channel_web_chat( client_id: &str, thread_id: &str, @@ -2003,6 +2015,7 @@ pub async fn channel_web_chat( profile_id: Option, locale: Option, queue_mode: Option, + voice: bool, ) -> Result, String> { let result = start_chat( client_id, @@ -2013,6 +2026,7 @@ pub async fn channel_web_chat( profile_id, locale, queue_mode, + voice, ) .await?; @@ -2163,6 +2177,10 @@ pub fn schemas(function: &str) -> ControllerSchema { "queue_mode", "Queue mode: 'interrupt' (default), 'steer', 'followup', or 'collect'.", ), + optional_bool( + "voice", + "True when the turn was voice-initiated (dictation / always-on). Speaks the approval prompt aloud for sensitive actions.", + ), ], outputs: vec![json_output("ack", "Acceptance payload.")], }, @@ -2218,6 +2236,7 @@ fn handle_chat(params: Map) -> ControllerFuture { p.profile_id, p.locale, p.queue_mode, + p.voice, ) .await?, ) @@ -2325,6 +2344,15 @@ fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { } } +fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { FieldSchema { name, diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index d1f087b780..f0eab646a7 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -24,19 +24,21 @@ static FORCED_ERROR_TEST_LOCK: Lazy> = Lazy::new(|| TokioMutex::n #[tokio::test] async fn start_chat_validates_required_fields() { - let err = start_chat("", "thread", "hello", None, None, None, None, None) + let err = start_chat("", "thread", "hello", None, None, None, None, None, false) .await .expect_err("client id should be required"); assert!(err.contains("client_id is required")); - let err = start_chat("client", "", "hello", None, None, None, None, None) + let err = start_chat("client", "", "hello", None, None, None, None, None, false) .await .expect_err("thread id should be required"); assert!(err.contains("thread_id is required")); - let err = start_chat("client", "thread", " ", None, None, None, None, None) - .await - .expect_err("message should be required"); + let err = start_chat( + "client", "thread", " ", None, None, None, None, None, false, + ) + .await + .expect_err("message should be required"); assert!(err.contains("message is required")); } @@ -51,6 +53,7 @@ async fn start_chat_rejects_prompt_injection_payload() { None, None, None, + false, ) .await .expect_err("prompt-injection payload should be rejected"); @@ -94,6 +97,7 @@ async fn start_chat_emits_sanitized_chat_error_on_inference_failure() { None, None, None, + false, ) .await .expect("start_chat should accept valid request"); @@ -505,6 +509,7 @@ async fn start_chat_chat_error_event_serializes_structured_fields_to_json_wire() None, None, None, + false, ) .await .expect("start_chat should accept valid request"); @@ -599,6 +604,7 @@ async fn start_chat_emits_structured_rate_limit_metadata_on_chat_error_event() { None, None, None, + false, ) .await .expect("start_chat should accept valid request"); diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 23a5fe9a0b..439d4a04b8 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -1101,6 +1101,8 @@ pub(crate) async fn process_channel_message( let approval_ctx = crate::openhuman::approval::ApprovalChatContext { thread_id: history_key.clone(), client_id: msg.channel.clone(), + // Channel turns (Telegram/Discord/…) are not voice-initiated. + voice: false, }; crate::openhuman::approval::APPROVAL_CHAT_CONTEXT .scope(approval_ctx, agent_call) diff --git a/src/openhuman/channels/runtime/startup.rs b/src/openhuman/channels/runtime/startup.rs index 287bba44fd..4751d06c2f 100644 --- a/src/openhuman/channels/runtime/startup.rs +++ b/src/openhuman/channels/runtime/startup.rs @@ -92,6 +92,9 @@ pub async fn start_channels(mut config: Config) -> Result<()> { // Surface parked ApprovalGate requests as chat messages so the user can // answer yes/no in the thread (chat-native approval, issue #1339). crate::openhuman::channels::providers::web::register_approval_surface_subscriber(); + // Speak the approval prompt aloud for voice-initiated turns (Phase 4 of + // #3148) so a hands-free user can answer yes/no by voice. + crate::openhuman::voice::approval_surface::register_voice_approval_surface(); // Surface generated-artifact lifecycle events (ArtifactReady / // ArtifactFailed) as `artifact_ready` / `artifact_failed` web-channel // events so the frontend ArtifactCard can render in chat (#2779). diff --git a/src/openhuman/tools/impl/system/install_tool.rs b/src/openhuman/tools/impl/system/install_tool.rs index 234da456e5..92552b7ae9 100644 --- a/src/openhuman/tools/impl/system/install_tool.rs +++ b/src/openhuman/tools/impl/system/install_tool.rs @@ -266,6 +266,7 @@ mod tests { ApprovalChatContext { thread_id: "t-test".into(), client_id: "c-test".into(), + voice: false, } } diff --git a/src/openhuman/voice/approval_surface.rs b/src/openhuman/voice/approval_surface.rs new file mode 100644 index 0000000000..05a60296fb --- /dev/null +++ b/src/openhuman/voice/approval_surface.rs @@ -0,0 +1,113 @@ +//! Voice-native approval surface — speak a parked approval prompt aloud. +//! +//! Phase 4 of #3148. The [`ApprovalGate`] already classifies sensitive agent +//! tool calls and parks them for a yes/no decision, but the prompt is +//! visual-only (the in-app approval card). A hands-free / always-on user looking +//! away from the screen never hears it. +//! +//! This subscriber mirrors `channels::providers::telegram::approval_surface`: +//! it watches [`DomainEvent::ApprovalRequested`] and, **only when the turn was +//! voice-initiated** (`is_voice == true`), publishes a [`SpeakRequest`] so the +//! assistant speaks the confirmation aloud. The user answers by voice — the +//! spoken "yes"/"no" rides the existing transcription → auto-send → web.rs +//! ingress yes/no path straight to `approval_decide`, so no answer-side wiring +//! is needed here. +//! +//! Typed-chat approvals (`is_voice == false`) are left untouched — they stay +//! visual-only, per the agreed scope. +//! +//! [`ApprovalGate`]: crate::openhuman::approval::ApprovalGate + +use crate::core::event_bus::{subscribe_global, DomainEvent, EventHandler, SubscriptionHandle}; +use crate::openhuman::voice::speak_bus::{publish_speak, SpeakRequest}; +use async_trait::async_trait; +use std::sync::{Arc, OnceLock}; + +const LOG_PREFIX: &str = "[voice-approval]"; + +/// Keeps the subscription alive for the process lifetime. `OnceLock` makes +/// [`register_voice_approval_surface`] idempotent — subsequent calls no-op. +static VOICE_APPROVAL_HANDLE: OnceLock = OnceLock::new(); + +/// Register the voice approval surface so spoken approval prompts fire for +/// voice-initiated turns. Idempotent; safe to call from multiple startup paths. +pub fn register_voice_approval_surface() { + if VOICE_APPROVAL_HANDLE.get().is_some() { + return; + } + match subscribe_global(Arc::new(VoiceApprovalSurfaceSubscriber)) { + Some(handle) => { + let _ = VOICE_APPROVAL_HANDLE.set(handle); + log::info!( + "{LOG_PREFIX} registered voice approval surface (domain=approval) — will speak \ + approval prompts for voice-initiated turns" + ); + } + None => { + log::warn!( + "{LOG_PREFIX} failed to register voice approval surface — bus not initialized" + ); + } + } +} + +/// `SpeakRequest.source` tag for spoken approval prompts. +pub const VOICE_APPROVAL_SOURCE: &str = "approval"; + +/// Render an approval request's redacted `action_summary` into a short spoken +/// confirmation line. Kept as a free function so tests pin the wording without a +/// bus round-trip. Returns `None` for an empty summary — never speak silence. +pub(crate) fn spoken_prompt(action_summary: &str) -> Option { + let summary = action_summary.trim(); + if summary.is_empty() { + return None; + } + // Drop a trailing period so the joined sentence reads cleanly. + let summary = summary.trim_end_matches('.'); + Some(format!("{summary}. Say yes to confirm, or no to cancel.")) +} + +/// Subscriber that speaks approval prompts for voice-initiated turns. +pub struct VoiceApprovalSurfaceSubscriber; + +#[async_trait] +impl EventHandler for VoiceApprovalSurfaceSubscriber { + fn name(&self) -> &str { + "voice::approval_surface" + } + + fn domains(&self) -> Option<&[&str]> { + Some(&["approval"]) + } + + async fn handle(&self, event: &DomainEvent) { + if let DomainEvent::ApprovalRequested { + request_id, + tool_name, + action_summary, + is_voice, + .. + } = event + { + if !*is_voice { + // Typed/visual approval — stays on the in-app card. + return; + } + let Some(line) = spoken_prompt(action_summary) else { + tracing::warn!( + "{LOG_PREFIX} voice approval request_id={request_id} tool={tool_name} \ + has an empty action_summary — not speaking" + ); + return; + }; + tracing::info!( + "{LOG_PREFIX} speaking approval prompt request_id={request_id} tool={tool_name}" + ); + publish_speak(SpeakRequest::new(line).with_source(VOICE_APPROVAL_SOURCE)); + } + } +} + +#[cfg(test)] +#[path = "approval_surface_tests.rs"] +mod tests; diff --git a/src/openhuman/voice/approval_surface_tests.rs b/src/openhuman/voice/approval_surface_tests.rs new file mode 100644 index 0000000000..23bc909264 --- /dev/null +++ b/src/openhuman/voice/approval_surface_tests.rs @@ -0,0 +1,117 @@ +//! Unit tests for the voice approval surface — pure prompt formatting plus the +//! `is_voice` gate that decides whether to speak. + +use super::*; +use crate::core::event_bus::DomainEvent; +use crate::openhuman::voice::speak_bus::{subscribe_speak_events, SpeakRequest}; +use tokio::sync::broadcast::error::TryRecvError; +use tokio::sync::Mutex; + +/// The speak bus is a process-global broadcast and every approval prompt shares +/// the same `VOICE_APPROVAL_SOURCE`, so a parallel test's published prompt would +/// leak into these receivers. Serialize the bus-touching tests: lock first, then +/// subscribe, so each runs with an isolated view of the bus. +static SPEAK_BUS_LOCK: Mutex<()> = Mutex::const_new(()); + +fn approval_event(action_summary: &str, is_voice: bool) -> DomainEvent { + DomainEvent::ApprovalRequested { + request_id: "req-1".to_string(), + tool_name: "composio".to_string(), + action_summary: action_summary.to_string(), + args_redacted: serde_json::json!({}), + thread_id: Some("thread-1".to_string()), + client_id: Some("client-1".to_string()), + is_voice, + } +} + +/// Drain the speak bus looking for a request with the given source. Returns the +/// matching request text, or `None` if none arrived (tolerates lagged events +/// from other tests sharing the process-global bus). +fn drain_for_source( + rx: &mut tokio::sync::broadcast::Receiver, + source: &str, +) -> Option { + for _ in 0..32 { + match rx.try_recv() { + Ok(req) if req.source.as_deref() == Some(source) => return Some(req.text), + Ok(_) => continue, + Err(TryRecvError::Lagged(_)) => continue, + Err(_) => return None, + } + } + None +} + +// ── spoken_prompt ──────────────────────────────────────────────────────────── + +#[test] +fn spoken_prompt_appends_confirmation() { + let line = spoken_prompt("Send a message to #general").unwrap(); + assert_eq!( + line, + "Send a message to #general. Say yes to confirm, or no to cancel." + ); +} + +#[test] +fn spoken_prompt_strips_trailing_period() { + let line = spoken_prompt("Delete 3 files.").unwrap(); + assert_eq!(line, "Delete 3 files. Say yes to confirm, or no to cancel."); +} + +#[test] +fn spoken_prompt_empty_is_none() { + assert_eq!(spoken_prompt(" "), None); +} + +// ── handle: the is_voice gate ──────────────────────────────────────────────── + +#[tokio::test] +async fn speaks_for_voice_initiated_approval() { + let _guard = SPEAK_BUS_LOCK.lock().await; + let mut rx = subscribe_speak_events(); + let sub = VoiceApprovalSurfaceSubscriber; + sub.handle(&approval_event("Post to Slack", true)).await; + let spoken = drain_for_source(&mut rx, VOICE_APPROVAL_SOURCE); + assert_eq!( + spoken.as_deref(), + Some("Post to Slack. Say yes to confirm, or no to cancel.") + ); +} + +#[tokio::test] +async fn silent_for_typed_approval() { + let _guard = SPEAK_BUS_LOCK.lock().await; + let mut rx = subscribe_speak_events(); + let sub = VoiceApprovalSurfaceSubscriber; + sub.handle(&approval_event("Post to Slack", false)).await; + assert_eq!( + drain_for_source(&mut rx, VOICE_APPROVAL_SOURCE), + None, + "typed approvals must stay visual-only" + ); +} + +#[tokio::test] +async fn ignores_non_approval_events() { + let _guard = SPEAK_BUS_LOCK.lock().await; + let mut rx = subscribe_speak_events(); + let sub = VoiceApprovalSurfaceSubscriber; + sub.handle(&DomainEvent::ApprovalDecided { + request_id: "req-1".to_string(), + tool_name: "composio".to_string(), + decision: "approve_once".to_string(), + }) + .await; + assert_eq!(drain_for_source(&mut rx, VOICE_APPROVAL_SOURCE), None); +} + +#[tokio::test] +async fn empty_summary_does_not_speak() { + let _guard = SPEAK_BUS_LOCK.lock().await; + let mut rx = subscribe_speak_events(); + let sub = VoiceApprovalSurfaceSubscriber; + sub.handle(&approval_event(" ", true)).await; + assert_eq!(drain_for_source(&mut rx, VOICE_APPROVAL_SOURCE), None); +} diff --git a/src/openhuman/voice/mod.rs b/src/openhuman/voice/mod.rs index d439c2712d..e60128b2ff 100644 --- a/src/openhuman/voice/mod.rs +++ b/src/openhuman/voice/mod.rs @@ -10,6 +10,7 @@ //! single domain root. pub mod always_on; +pub mod approval_surface; pub mod audio_capture; pub(crate) mod cli; pub mod command_router; @@ -20,6 +21,7 @@ mod ops; pub mod reply_speech; mod schemas; pub mod server; +pub mod speak_bus; pub mod text_input; mod types; diff --git a/src/openhuman/voice/speak_bus.rs b/src/openhuman/voice/speak_bus.rs new file mode 100644 index 0000000000..06f794ccec --- /dev/null +++ b/src/openhuman/voice/speak_bus.rs @@ -0,0 +1,120 @@ +//! Broadcast bus for **proactive assistant speech** (core → UI). +//! +//! Mirrors `overlay::bus`: a single `tokio::sync::broadcast` channel wrapped in +//! a `Lazy` static so any core module can ask the assistant to *speak* a line +//! without threading a sender around. The Socket.IO bridge in +//! `core::socketio::spawn_web_channel_bridge` subscribes here and forwards every +//! request to the desktop UI as a `voice:speak` message, which the frontend +//! plays through the existing TTS pipeline (`openhuman.voice_reply_synthesize`). +//! +//! Today's only producer is the voice-native approval surface +//! (`voice::approval_surface`): when a sensitive action is parked for approval +//! during a **voice-initiated** turn, it speaks the confirmation prompt aloud so +//! a hands-free user can answer "yes"/"no" by voice (Phase 4 of #3148). + +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use tokio::sync::broadcast; + +const LOG_PREFIX: &str = "[voice-speak]"; + +/// A request for the assistant to speak a line aloud. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct SpeakRequest { + /// The text to synthesize and play. + pub text: String, + /// Originating subsystem, for diagnostics/UI (e.g. `"approval"`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +impl SpeakRequest { + /// Build a speak request with no source label. + pub fn new(text: impl Into) -> Self { + Self { + text: text.into(), + source: None, + } + } + + /// Tag the request with an originating subsystem. + pub fn with_source(mut self, source: impl Into) -> Self { + self.source = Some(source.into()); + self + } +} + +static SPEAK_BUS: Lazy> = Lazy::new(|| { + let (tx, _rx) = broadcast::channel(32); + tx +}); + +/// Subscribe to speak requests. Used by the Socket.IO bridge. +pub fn subscribe_speak_events() -> broadcast::Receiver { + SPEAK_BUS.subscribe() +} + +/// Publish a request for the assistant to speak `request.text`. +/// +/// Fire-and-forget: if nobody is subscribed (bridge not started, UI offline) the +/// request is dropped. Empty/whitespace text is a no-op — never synthesize +/// silence. Returns the number of subscribers that received it, for diagnostics. +pub fn publish_speak(request: SpeakRequest) -> usize { + if request.text.trim().is_empty() { + log::debug!("{LOG_PREFIX} ignoring empty speak request"); + return 0; + } + log::debug!( + "{LOG_PREFIX} publish speak source={:?} chars={}", + request.source, + request.text.len() + ); + match SPEAK_BUS.send(request) { + Ok(n) => n, + Err(_) => { + log::debug!("{LOG_PREFIX} no speak subscribers — request dropped"); + 0 + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn publish_is_received_by_subscriber() { + let mut rx = subscribe_speak_events(); + let delivered = publish_speak(SpeakRequest::new("hello there").with_source("test")); + assert!(delivered >= 1); + // The process-global bus may carry lagged events from parallel tests; + // drain until we find ours. + let mut found = false; + for _ in 0..16 { + match rx.try_recv() { + Ok(req) if req.text == "hello there" => { + assert_eq!(req.source.as_deref(), Some("test")); + found = true; + break; + } + Ok(_) => continue, + Err(broadcast::error::TryRecvError::Lagged(_)) => continue, + Err(_) => break, + } + } + assert!(found, "expected our speak request from the broadcast bus"); + } + + #[test] + fn empty_text_is_not_published() { + let _rx = subscribe_speak_events(); + assert_eq!(publish_speak(SpeakRequest::new(" ")), 0); + } + + #[test] + fn builder_sets_source() { + let req = SpeakRequest::new("hi").with_source("approval"); + assert_eq!(req.source.as_deref(), Some("approval")); + assert_eq!(SpeakRequest::new("hi").source, None); + } +} diff --git a/src/openhuman/wallet/execution_tests.rs b/src/openhuman/wallet/execution_tests.rs index 10d3fff80e..5e2e3b7268 100644 --- a/src/openhuman/wallet/execution_tests.rs +++ b/src/openhuman/wallet/execution_tests.rs @@ -479,6 +479,7 @@ fn chat_ctx_from(owner: &QuoteOwner) -> crate::openhuman::approval::ApprovalChat crate::openhuman::approval::ApprovalChatContext { thread_id: owner.thread_id.clone(), client_id: owner.client_id.clone(), + voice: false, } } diff --git a/tests/channels_large_round25_raw_coverage_e2e.rs b/tests/channels_large_round25_raw_coverage_e2e.rs index 4ffdf20978..fbcf78958e 100644 --- a/tests/channels_large_round25_raw_coverage_e2e.rs +++ b/tests/channels_large_round25_raw_coverage_e2e.rs @@ -114,7 +114,7 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { ); assert!( - web::start_chat(" ", "thread", "hello", None, None, None, None, None) + web::start_chat(" ", "thread", "hello", None, None, None, None, None, false) .await .unwrap_err() .contains("client_id is required") @@ -138,6 +138,7 @@ async fn web_channel_validation_cancellation_and_error_events_are_observable() { None, Some("en-US".to_string()), None, + false, ) .await .expect("start forced-error chat"); diff --git a/tests/channels_provider_deep_raw_coverage_e2e.rs b/tests/channels_provider_deep_raw_coverage_e2e.rs index 5e0af50096..4585a67f0a 100644 --- a/tests/channels_provider_deep_raw_coverage_e2e.rs +++ b/tests/channels_provider_deep_raw_coverage_e2e.rs @@ -230,19 +230,19 @@ async fn dispatch_harness_covers_error_context_compaction_and_timeout_paths() { #[tokio::test] async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exercised() { assert!( - start_chat("", "thread", "hello", None, None, None, None, None) + start_chat("", "thread", "hello", None, None, None, None, None, false) .await .expect_err("empty client rejected") .contains("client_id") ); assert!( - start_chat("client", "", "hello", None, None, None, None, None) + start_chat("client", "", "hello", None, None, None, None, None, false) .await .expect_err("empty thread rejected") .contains("thread_id") ); assert!( - start_chat("client", "thread", " ", None, None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, None, false) .await .expect_err("empty message rejected") .contains("message") @@ -266,6 +266,7 @@ async fn web_channel_validation_cancel_and_classifier_snapshots_are_publicly_exe None, None, None, + false, ) .await; assert!(blocked.is_err()); diff --git a/tests/channels_provider_leftovers_raw_coverage_e2e.rs b/tests/channels_provider_leftovers_raw_coverage_e2e.rs index 6220faf012..b1511c65ee 100644 --- a/tests/channels_provider_leftovers_raw_coverage_e2e.rs +++ b/tests/channels_provider_leftovers_raw_coverage_e2e.rs @@ -347,6 +347,7 @@ async fn web_round19_covers_classifier_variants_and_cancel_cleanup() { None, None, None, + false, ) .await .expect("start forced web chat"); diff --git a/tests/channels_runtime_raw_coverage_e2e.rs b/tests/channels_runtime_raw_coverage_e2e.rs index e02d19e9aa..792010c6f5 100644 --- a/tests/channels_runtime_raw_coverage_e2e.rs +++ b/tests/channels_runtime_raw_coverage_e2e.rs @@ -373,19 +373,19 @@ async fn yuanbao_public_channel_and_config_paths_are_isolated_from_network() { #[tokio::test] async fn web_channel_validation_cancel_and_event_subscription_are_fast() { assert!( - start_chat("", "thread", "hello", None, None, None, None, None) + start_chat("", "thread", "hello", None, None, None, None, None, false) .await .expect_err("empty client rejected") .contains("client_id") ); assert!( - start_chat("client", "", "hello", None, None, None, None, None) + start_chat("client", "", "hello", None, None, None, None, None, false) .await .expect_err("empty thread rejected") .contains("thread_id") ); assert!( - start_chat("client", "thread", " ", None, None, None, None, None) + start_chat("client", "thread", " ", None, None, None, None, None, false) .await .expect_err("empty message rejected") .contains("message") @@ -409,6 +409,7 @@ async fn web_channel_validation_cancel_and_event_subscription_are_fast() { None, None, None, + false, ) .await; assert!( diff --git a/tests/channels_web_startup_raw_coverage_e2e.rs b/tests/channels_web_startup_raw_coverage_e2e.rs index d4e1849bf0..02c653e8c8 100644 --- a/tests/channels_web_startup_raw_coverage_e2e.rs +++ b/tests/channels_web_startup_raw_coverage_e2e.rs @@ -113,9 +113,11 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { assert_eq!(all_web_channel_registered_controllers().len(), 4); assert_eq!(schemas("missing").function, "unknown"); - let err = channel_web_chat("client", "thread", " ", None, None, None, None, None) - .await - .expect_err("blank messages are rejected"); + let err = channel_web_chat( + "client", "thread", " ", None, None, None, None, None, false, + ) + .await + .expect_err("blank messages are rejected"); assert!(err.contains("message is required")); let cancel = channel_web_cancel("client", "missing-thread") @@ -140,6 +142,7 @@ async fn web_controllers_validate_inputs_and_emit_structured_forced_errors() { None, Some("zh-CN".to_string()), None, + false, ) .await .expect("chat request accepted") @@ -185,6 +188,7 @@ async fn web_chat_cancel_aborts_in_flight_thread_without_real_provider() { None, None, None, + false, ) .await .expect("start chat"); diff --git a/tests/channels_web_telegram_raw_coverage_e2e.rs b/tests/channels_web_telegram_raw_coverage_e2e.rs index 30ccf3709c..d0653f919a 100644 --- a/tests/channels_web_telegram_raw_coverage_e2e.rs +++ b/tests/channels_web_telegram_raw_coverage_e2e.rs @@ -254,6 +254,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio args_redacted: json!({ "path": "target/channels-web-telegram-round18-artifact" }), thread_id: Some("round18-thread".to_string()), client_id: Some("round18-client".to_string()), + is_voice: false, }); let approval = timeout(Duration::from_secs(5), async { @@ -285,6 +286,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio args_redacted: json!({}), thread_id: None, client_id: Some("round18-client".to_string()), + is_voice: false, }); web_test_support::set_forced_run_chat_task_error_for_test(Some( @@ -300,6 +302,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio Some("missing-profile".to_string()), Some("en-US".to_string()), None, + false, ) .await .expect("forced chat accepted"); @@ -325,6 +328,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, + false, ) .await .expect("first chat accepted"); @@ -337,6 +341,7 @@ async fn web_channel_approval_bridge_forced_errors_and_newer_request_cancellatio None, None, None, + false, ) .await .expect("second chat accepted"); diff --git a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs index 377ffd1efb..5ae037207c 100644 --- a/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs +++ b/tests/channels_web_yuanbao_round22_raw_coverage_e2e.rs @@ -189,13 +189,13 @@ fn isolated_config() -> (tempfile::TempDir, Config) { #[tokio::test] async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured() { assert_eq!( - start_chat(" ", "thread", "hello", None, None, None, None, None) + start_chat(" ", "thread", "hello", None, None, None, None, None, false) .await .unwrap_err(), "client_id is required" ); assert_eq!( - start_chat("client", " ", "hello", None, None, None, None, None) + start_chat("client", " ", "hello", None, None, None, None, None, false) .await .unwrap_err(), "thread_id is required" @@ -215,6 +215,7 @@ async fn web_start_chat_validation_forced_error_and_cancel_paths_are_structured( None, None, None, + false, ) .await .expect("accepted"); diff --git a/tests/tool_registry_approval_raw_coverage_e2e.rs b/tests/tool_registry_approval_raw_coverage_e2e.rs index 5d43709914..324b817985 100644 --- a/tests/tool_registry_approval_raw_coverage_e2e.rs +++ b/tests/tool_registry_approval_raw_coverage_e2e.rs @@ -1189,6 +1189,7 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { ApprovalChatContext { thread_id: "approval-raw-thread".to_string(), client_id: "approval-raw-client".to_string(), + voice: false, }, async move { gate_for_task @@ -1403,6 +1404,7 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { ApprovalChatContext { thread_id: "approval-live-policy-thread".to_string(), client_id: "approval-live-policy-client".to_string(), + voice: false, }, gate.intercept_audited( "tools.live_policy_allowed", @@ -1428,6 +1430,7 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { ApprovalChatContext { thread_id: "approval-deny-thread".to_string(), client_id: "approval-deny-client".to_string(), + voice: false, }, async move { gate_for_deny_task @@ -1546,6 +1549,7 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { ApprovalChatContext { thread_id: "approval-persist-failure-thread".to_string(), client_id: "approval-persist-failure-client".to_string(), + voice: false, }, gate.intercept_audited( "tools.persistence_failure", diff --git a/tests/tools_approval_channels_raw_coverage_e2e.rs b/tests/tools_approval_channels_raw_coverage_e2e.rs index f7dc8311cf..fcd7d5b47b 100644 --- a/tests/tools_approval_channels_raw_coverage_e2e.rs +++ b/tests/tools_approval_channels_raw_coverage_e2e.rs @@ -2073,7 +2073,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "", "thread-1", "hello", None, None, None, None, None, + "", "thread-1", "hello", None, None, None, None, None, false, ) .await .expect_err("blank client_id"), @@ -2081,7 +2081,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "", "hello", None, None, None, None, None, + "client-1", "", "hello", None, None, None, None, None, false, ) .await .expect_err("blank thread_id"), @@ -2089,7 +2089,7 @@ async fn web_channel_public_paths_cover_event_delivery_and_validation_errors() { ); assert_eq!( openhuman_core::openhuman::channels::web::start_chat( - "client-1", "thread-1", " ", None, None, None, None, None, + "client-1", "thread-1", " ", None, None, None, None, None, false, ) .await .expect_err("blank message"), diff --git a/tests/tools_network_channels_raw_coverage_e2e.rs b/tests/tools_network_channels_raw_coverage_e2e.rs index 26f759fa7e..938e0401cc 100644 --- a/tests/tools_network_channels_raw_coverage_e2e.rs +++ b/tests/tools_network_channels_raw_coverage_e2e.rs @@ -556,7 +556,7 @@ async fn web_channel_public_paths_cover_validation_cancel_schema_and_event_bus() assert_eq!(web_channel_schema("cancel").function, "web_cancel"); assert_eq!(web_channel_schema("missing").function, "unknown"); - let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, None) + let missing_client = start_chat(" ", "thread", "hello", None, None, None, None, None, false) .await .expect_err("blank client"); assert_contains(&missing_client, "client_id is required"); diff --git a/tests/worker_b_raw_coverage_e2e.rs b/tests/worker_b_raw_coverage_e2e.rs index 0afaba629d..6dbaa14f87 100644 --- a/tests/worker_b_raw_coverage_e2e.rs +++ b/tests/worker_b_raw_coverage_e2e.rs @@ -597,6 +597,7 @@ async fn approval_gate_rpc_decision_resumes_parked_tool_and_records_execution() ApprovalChatContext { thread_id: "worker-b-thread".to_string(), client_id: "worker-b-client".to_string(), + voice: false, }, async move { gate_for_task