diff --git a/docs/memory-budget.md b/docs/memory-budget.md index 01204f68..2212d8e1 100644 --- a/docs/memory-budget.md +++ b/docs/memory-budget.md @@ -22,7 +22,7 @@ explicit, enable capacity planning, and provide a stable contract that CI can ve | `previousSnapshots` (allocation baselines) | [`src/tools/flutter-memory-profile.ts:63`](../src/tools/flutter-memory-profile.ts#L63) | LRU, `MAX_DEVICES` (16) entries | 128 KB / device | | `proxies` (network proxy state) | [`src/tools/flutter-network.ts:42`](../src/tools/flutter-network.ts#L42) | FIFO, `MAX_ENTRIES` (1000) entries per device; removed on `handleStop()` | 1 MB / device | | `trackers` (rebuild tracking) | [`src/tools/flutter-track-rebuilds.ts:51`](../src/tools/flutter-track-rebuilds.ts#L51) | `MAX_EVENTS_PER_TRACKER` (10,000) events; removed on stop | 2 MB / device | -| `flutterClientCache` | [`src/tools/native-input-backend.ts:670`](../src/tools/native-input-backend.ts#L670) | Per bundleId+deviceId; negative entries expire after `NEGATIVE_CACHE_TTL_MS` (30 s) | 64 KB / entry | +| `flutterClientCache` | [`src/input/flutter-resolver.ts:32`](../src/input/flutter-resolver.ts#L32) | Per bundleId+deviceId; negative entries expire after `NEGATIVE_CACHE_TTL_MS` (30 s) | 64 KB / entry | | `pools` (tab manager) | [`src/tools/tab-manager.ts:25`](../src/tools/tab-manager.ts#L25) | Per device; removed on `disposeDevice()` | 256 KB / pool | | `peakRssBytes` / `sampleCount` (memory tracker) | [`src/metrics/memory-tracker.ts:55`](../src/metrics/memory-tracker.ts#L55) | Process lifetime (scalar integers) | < 1 KB | diff --git a/src/input/applescript-backend.ts b/src/input/applescript-backend.ts new file mode 100644 index 00000000..4c8378a4 --- /dev/null +++ b/src/input/applescript-backend.ts @@ -0,0 +1,349 @@ +/** + * AppleScriptInputBackend — uses AppleScript (`osascript`) and Swift CGEvent. + * + * This backend is **default-deny**: it is only instantiated when the caller + * explicitly opts in via `OPENSAFARI_ALLOW_FOCUS_INPUT=1`. Without the opt-in, + * `getInputBackend()` throws `HeadlessInputUnavailableError` instead, preventing + * the surprising focus-theft / mouse-movement behavior that motivated issues + * #403 and #405. + * + * Works on any Xcode version as it bypasses `simctl io input` entirely. + * + * Requires: + * - Accessibility permissions for System Events + * - Simulator app running and visible + * + * Coordinate translation assumes Simulator is at default "Point Accurate" (1:1) zoom. + * + * Split from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. Behavior is strictly unchanged. + */ + +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import { timedInput } from '../metrics/input-telemetry'; +import type { InputBackend } from './backend'; + +const execFileAsync = promisify(execFile); + +function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * AppleScript key-code mapping (macOS virtual key codes). + * Used to translate HID key codes and key names to AppleScript `key code` values. + */ +export const HID_TO_APPLESCRIPT: Record = { + '40': 36, // Return / Enter + '41': 53, // Escape + '42': 51, // Backspace / Delete + '43': 48, // Tab + '44': 49, // Space + '74': 115, // Home + '79': 124, // Right arrow + '80': 123, // Left arrow + '81': 125, // Down arrow + '82': 126, // Up arrow +}; + +export const SENDKEY_TO_APPLESCRIPT: Record = { + Return: 36, + Escape: 53, + Tab: 48, + Space: 49, + Delete: 51, + Home: 115, +}; + +/** + * Uses AppleScript (`osascript`) and Swift CGEvent for input. + * Works on any Xcode version as it bypasses `simctl io input` entirely. + */ +export class AppleScriptInputBackend implements InputBackend { + readonly kind = 'applescript' as const; + + /** + * Per-device cache for the resolved content origin. + * Key: deviceId, Value: { x, y, winX, winY } where winX/winY is the window + * top-left at the time of the last measurement (used to detect window moves). + */ + private originCache = new Map(); + + /** Set of deviceIds that have already emitted the AX fallback warning. */ + private warnedDevices = new Set(); + + /** + * Timestamp of the last successful Simulator activation (ms since epoch). + * Retained for observability and potential future diagnostics; no longer + * used to gate the frontmost-app check — every `activateSimulator()` call + * queries System Events to confirm current focus state before deciding + * whether to activate. + * + * This field is scoped to the AppleScript backend instance and does NOT + * affect any headless tier. It is NOT shared with `getInputBackend()`. + */ + private static readonly ACTIVATION_CACHE_TTL_MS = 500; + private lastActivationAt = 0; + + private async runAppleScript(lines: string[]): Promise { + const args = lines.flatMap((line) => ['-e', line]); + const { stdout } = await execFileAsync('osascript', args, { timeout: 10_000 }); + return stdout.trim(); + } + + /** + * Activate Simulator.app via AppleScript when it is not already frontmost. + * + * On every call we query System Events for the current frontmost process + * name. If Simulator is already frontmost we skip the `activate` call and + * the 150 ms settle delay — the frontmost check is a single cheap osascript + * IPC round-trip (~5–10 ms) and is always correct regardless of how recently + * the last activation occurred. + * + * `lastActivationAt` is retained for observability / future diagnostics but + * no longer gates the frontmost check — removing the TTL early-return + * ensures input is never delivered to the wrong app when focus changes + * between consecutive calls in a burst. + * + * This optimisation applies ONLY to the opt-in focus-stealing path — + * all headless backends skip this method entirely. + */ + private async activateSimulator(): Promise { + // Always check frontmost state; the IPC cost (~5–10 ms) is cheaper than + // the risk of delivering input to the wrong app after a focus change. + const frontApp = await this.runAppleScript([ + 'tell application "System Events" to set frontApp to name of first application process whose frontmost is true', + 'return frontApp', + ]); + if (frontApp !== 'Simulator') { + await this.runAppleScript(['tell application "Simulator" to activate']); + await delay(150); + } + this.lastActivationAt = Date.now(); + } + + /** + * Get the Simulator window's content-area origin in macOS screen coordinates + * by querying the position of the first child UI element (the iOS device + * content area within the macOS window). This avoids hardcoding any title-bar + * height offset and handles Xcode 26 where the AX bridge already returns + * frames in window-relative coordinates. + * + * On any AppleScript failure, falls back to the raw window position (offset 0) + * and emits one `console.error` warning per device. The result is cached per + * deviceId; pass `{ refresh: true }` to invalidate the cache. + */ + async getSimulatorContentOrigin( + deviceId: string, + options?: { refresh?: boolean }, + ): Promise<{ x: number; y: number }> { + if (!options?.refresh) { + const cached = this.originCache.get(deviceId); + if (cached) { + return { x: cached.x, y: cached.y }; + } + } + + let winX = 0; + let winY = 0; + let contentX = 0; + let contentY = 0; + + try { + const result = await this.runAppleScript([ + 'tell application "System Events"', + ' tell process "Simulator"', + ' set winPos to position of window 1', + ' set wx to item 1 of winPos', + ' set wy to item 2 of winPos', + ' set childPos to position of UI element 1 of window 1', + ' set cx to item 1 of childPos', + ' set cy to item 2 of childPos', + ' return (wx as text) & "," & (wy as text) & "|" & (cx as text) & "," & (cy as text)', + ' end tell', + 'end tell', + ]); + + const [winPart, childPart] = result.split('|'); + if (!winPart || !childPart) { + throw new Error(`Unexpected AX output: ${result}`); + } + const [px, py] = winPart.split(',').map(Number); + const [cx, cy] = childPart.split(',').map(Number); + if ([px, py, cx, cy].some((n) => !isFinite(n))) { + throw new Error(`Non-numeric values in AX output: ${result}`); + } + winX = px; + winY = py; + contentX = cx; + contentY = cy; + } catch (err) { + // Fallback: use raw window position (zero title-bar offset). + // Only warn once per device to avoid log spam. + if (!this.warnedDevices.has(deviceId)) { + this.warnedDevices.add(deviceId); + console.error( + `[input-backend] AppleScript AX content-origin query failed for device ${deviceId}; ` + + `falling back to window position (offset 0). ` + + `Error: ${err instanceof Error ? err.message : String(err)}`, + ); + } + + // Attempt a simpler query to get the window position for the fallback. + try { + const winResult = await this.runAppleScript([ + 'tell application "System Events"', + ' tell process "Simulator"', + ' set winPos to position of window 1', + ' set wx to item 1 of winPos', + ' set wy to item 2 of winPos', + ' return (wx as text) & "," & (wy as text)', + ' end tell', + 'end tell', + ]); + const [fx, fy] = winResult.split(',').map(Number); + if (isFinite(fx) && isFinite(fy)) { + winX = fx; + winY = fy; + } + } catch { + // If even the fallback fails, use 0,0. + } + contentX = winX; + contentY = winY; + } + + this.originCache.set(deviceId, { x: contentX, y: contentY, winX, winY }); + return { x: contentX, y: contentY }; + } + + /** + * Translate iOS point coordinates to absolute macOS screen coordinates. + * Assumes 1:1 point mapping (Simulator at default zoom). + */ + private async toScreen( + deviceId: string, + x: number, + y: number, + ): Promise<{ sx: number; sy: number }> { + const origin = await this.getSimulatorContentOrigin(deviceId); + return { + sx: Math.round(origin.x + x), + sy: Math.round(origin.y + y), + }; + } + + async tap(deviceId: string, x: number, y: number, duration?: number): Promise { + await timedInput(this.kind, 'tap', deviceId, async () => { + await this.activateSimulator(); + const { sx, sy } = await this.toScreen(deviceId, x, y); + + if (duration && duration > 0) { + // Long press: mouse down → wait → mouse up via Swift CGEvent + await execFileAsync('swift', ['-e', [ + 'import Cocoa', + `let p = CGPoint(x: ${sx}, y: ${sy})`, + 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', + `Thread.sleep(forTimeInterval: ${duration})`, + 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', + ].join('\n')], { timeout: Math.max(15_000, duration * 1000 + 5000) }); + } else { + await this.runAppleScript([ + `tell application "System Events" to click at {${sx}, ${sy}}`, + ]); + } + }); + } + + async swipe( + deviceId: string, + startX: number, startY: number, + endX: number, endY: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'swipe', deviceId, async () => { + await this.activateSimulator(); + // Get origin once for both start and end coordinates + const origin = await this.getSimulatorContentOrigin(deviceId); + const sx = Math.round(origin.x + startX); + const sy = Math.round(origin.y + startY); + const ex = Math.round(origin.x + endX); + const ey = Math.round(origin.y + endY); + const dur = duration ?? 0.5; + const steps = 20; + const stepDelay = dur / steps; + + // Mouse drag via Swift CGEvent (macOS built-in, no external deps) + await execFileAsync('swift', ['-e', [ + 'import Cocoa', + `let x1: CGFloat = ${sx}, y1: CGFloat = ${sy}`, + `let x2: CGFloat = ${ex}, y2: CGFloat = ${ey}`, + `let steps = ${steps}`, + `let stepDelay = ${stepDelay}`, + 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: CGPoint(x: x1, y: y1), mouseButton: .left)!.post(tap: .cghidEventTap)', + 'Thread.sleep(forTimeInterval: 0.05)', + 'for i in 1...steps {', + ' let t = CGFloat(i) / CGFloat(steps)', + ' let p = CGPoint(x: x1 + (x2 - x1) * t, y: y1 + (y2 - y1) * t)', + ' CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', + ' Thread.sleep(forTimeInterval: stepDelay)', + '}', + 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: CGPoint(x: x2, y: y2), mouseButton: .left)!.post(tap: .cghidEventTap)', + ].join('\n')], { timeout: 15_000 }); + }); + } + + async typeText(deviceId: string, text: string, _delayMs?: number): Promise { + await timedInput(this.kind, 'typeText', deviceId, async () => { + await this.activateSimulator(); + // Escape special AppleScript characters + const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + await this.runAppleScript([ + `tell application "System Events" to keystroke "${escaped}"`, + ]); + }); + } + + async keypress(deviceId: string, keyCode: string): Promise { + await timedInput(this.kind, 'keypress', deviceId, async () => { + await this.activateSimulator(); + const asKeyCode = HID_TO_APPLESCRIPT[keyCode]; + if (asKeyCode === undefined) { + throw new Error( + `Unknown HID key code "${keyCode}" for AppleScript backend. ` + + `Supported: ${Object.keys(HID_TO_APPLESCRIPT).join(', ')}`, + ); + } + await this.runAppleScript([ + `tell application "System Events" to key code ${asKeyCode}`, + ]); + }); + } + + async sendKey(deviceId: string, keyName: string): Promise { + await timedInput(this.kind, 'sendKey', deviceId, async () => { + await this.activateSimulator(); + const asKeyCode = SENDKEY_TO_APPLESCRIPT[keyName]; + if (asKeyCode === undefined) { + throw new Error( + `Unknown key name "${keyName}" for AppleScript backend. ` + + `Supported: ${Object.keys(SENDKEY_TO_APPLESCRIPT).join(', ')}`, + ); + } + await this.runAppleScript([ + `tell application "System Events" to key code ${asKeyCode}`, + ]); + }); + } + + /** + * Batching is not supported on AppleScriptInputBackend. This is the + * opt-in focus-stealing path; each tap must activate Simulator.app first, + * so there is no meaningful process-spawn reduction available. Callers + * that need repeated taps via this backend must invoke `tap()` in a loop. + */ + supportsBatching(): boolean { + return false; + } +} diff --git a/src/input/backend-resolver.ts b/src/input/backend-resolver.ts new file mode 100644 index 00000000..57ef5d68 --- /dev/null +++ b/src/input/backend-resolver.ts @@ -0,0 +1,337 @@ +/** + * InputBackendResolver — encapsulates backend detection, caching, and the + * 4-tier fallback strategy with default-deny hardening for the focus-stealing + * path. + * + * Extracted from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. Resolution policy and fallback order are strictly unchanged. + * + * Tier order (highest priority first): + * 0. FlutterVMInputBackend — Flutter VM Service (debug/profile builds only) + * 1. PointerService — opt-in experimental (#590 Phase 1) + * 1. SimulatorKitHID — headless, any app, all Xcode versions + * 2. SimctlInputBackend — `simctl io input` (Xcode ≤ 16) + * 2. WebKitInputBackend — JS touch events via WebKit (Safari only) + * 3. AppleScriptInputBackend — CGEvent / focus-stealing, DEFAULT-DENY + */ + +import { SimctlExecutor } from '../simulator/simctl'; +import type { BrowserBackend } from '../types/browser-backend'; +import { FlutterVMInputBackend } from './flutter-vm-backend'; +import { tryCreateSimulatorKitHIDBackend } from './sim-hid-backend'; +import { + isPointerServiceEnabled, + tryCreatePointerServiceBackend, +} from './pointer-service-backend'; +import type { InputBackend } from './backend'; +import { SimctlInputBackend } from './simctl-backend'; +import { AppleScriptInputBackend } from './applescript-backend'; +import { WebKitInputBackend } from './webkit-backend'; +import { FlutterVMResolverInstance } from './flutter-resolver'; +import type { FlutterVMResolver } from './flutter-resolver'; + +// ── Error class ─────────────────────────────────────────────────────────────── + +/** + * Environment variable that opts in to the focus-stealing AppleScript / CGEvent + * input backend. When unset (the default), `getInputBackend()` refuses to + * instantiate `AppleScriptInputBackend` and throws `HeadlessInputUnavailableError` + * instead, preventing silent focus theft. + */ +export const OPENSAFARI_ALLOW_FOCUS_INPUT_ENV = 'OPENSAFARI_ALLOW_FOCUS_INPUT'; +export const OPENSAFARI_HEADLESS_ONLY_ENV = 'OPENSAFARI_HEADLESS_ONLY'; + +function isFocusInputAllowed(): boolean { + const value = process.env[OPENSAFARI_ALLOW_FOCUS_INPUT_ENV]; + return value === '1' || value === 'true'; +} + +function isHeadlessOnly(): boolean { + const value = process.env[OPENSAFARI_HEADLESS_ONLY_ENV]; + return value === '1' || value === 'true'; +} + +/** + * Thrown by `getInputBackend()` when no headless input method is available and + * the caller has not opted in to the focus-stealing fallback. + */ +export class HeadlessInputUnavailableError extends Error { + readonly name = 'HeadlessInputUnavailableError' as const; + readonly deviceId: string; + readonly reason: + | 'no-simctl' + | 'no-webkit' + | 'webkit-disconnected' + | 'headless-only'; + readonly remediation: readonly string[]; + + constructor( + deviceId: string, + reason: HeadlessInputUnavailableError['reason'], + ) { + const remediation = + reason === 'headless-only' + ? ([ + `${OPENSAFARI_HEADLESS_ONLY_ENV}=1 is set — AppleScript/CGEvent fallback is blocked.`, + 'Ensure a headless backend (simctl, webkit, flutter-vm, simhid) is available.', + `To allow focus-stealing input, unset ${OPENSAFARI_HEADLESS_ONLY_ENV}.`, + ] as const) + : ([ + "Safari QA: call `set_active_context({ context: 'safari' })` to enable WebKitInputBackend", + `Native apps: opt in to the CGEvent fallback by setting ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV}=1 ` + + '(WARNING: will move the mouse cursor and bring Simulator.app to the foreground)', + ] as const); + const message = + `No headless input backend available for device ${deviceId} (reason: ${reason}).\n` + + remediation.map((line) => ` - ${line}`).join('\n'); + super(message); + this.deviceId = deviceId; + this.reason = reason; + this.remediation = remediation; + // Preserve prototype chain across the TypeScript down-compile + Object.setPrototypeOf(this, HeadlessInputUnavailableError.prototype); + } +} + +// ── Probe helpers ───────────────────────────────────────────────────────────── + +/** + * Probe whether the legacy `simctl io input` subcommand is available. + * + * Uses `xcrun simctl help io` which does NOT require a booted device — so a + * transient device fault cannot poison the cached `simctlAvailable` flag for + * the lifetime of the resolver. On Xcode 26+ the `input` subcommand was + * removed and the help listing no longer mentions it. + */ +async function probeSimctlInput(): Promise { + const simctl = new SimctlExecutor(); + try { + const helpOutput = await simctl.exec(['help', 'io'], { timeout: 5000 }); + // Definitive negative: help text was emitted and does NOT list `input` + // — that's exactly the Xcode 26+ shape, so reject. + if (helpOutput.length > 0 && !/\binput\b/.test(helpOutput)) { + return false; + } + // Empty output or `input` mentioned → assume available; the actual + // tap/typeText calls will surface their own errors if simctl misbehaves. + return true; + } catch { + // simctl missing entirely (no Xcode CLT) — treat as unavailable. + return false; + } +} + +/** + * Attempt a single WebKit reconnect for a client that exists but reports + * `isConnected() === false`. Returns true if the client is usable after the + * attempt. Never throws — transient failures fall through to Tier 3. + */ +async function tryReconnectWebKit(client: BrowserBackend): Promise { + try { + await client.connect(); + return client.isConnected(); + } catch (err) { + console.error( + `[input-backend] WebKit reconnect attempt failed: ${err instanceof Error ? err.message : String(err)}`, + ); + return false; + } +} + +// ── InputBackendResolver ────────────────────────────────────────────────────── + +/** + * Owns all backend detection state (simctl probe result, backend singletons, + * Flutter VM cache). Create one instance per test or call `reset()` to clear + * all cached state between runs. + * + * A module-level singleton is exported as `defaultResolver` for production use. + */ +export class InputBackendResolver { + private simctlAvailable: boolean | null = null; + private detectionPromise: Promise | null = null; + private cachedSimctlBackend: SimctlInputBackend | null = null; + private cachedAppleScriptBackend: AppleScriptInputBackend | null = null; + private focusInputOptInWarned = false; + + // SimulatorKit HID backend cache (Tier 1) + private simHidProbed = false; + private cachedSimHidBackend: InputBackend | null = null; + + // PointerService backend cache (opt-in, Phase 1 of #590) + private pointerServiceProbed = false; + private cachedPointerServiceBackend: InputBackend | null = null; + + private flutterResolver = new FlutterVMResolverInstance(); + + /** + * Get the input backend using a 4-tier fallback strategy with default-deny + * hardening for the focus-stealing path. + */ + async getInputBackend( + deviceId: string, + webkitClient?: BrowserBackend | null, + ): Promise { + // Tier 0: Flutter VM Service (headless, no focus stealing, no opt-in). + const flutterClient = await this.flutterResolver.resolve(deviceId); + if (flutterClient) { + return new FlutterVMInputBackend(flutterClient); + } + + // Tier 1 (opt-in): PointerService backend — Phase 1 of #590. + if (isPointerServiceEnabled()) { + if (!this.pointerServiceProbed) { + this.pointerServiceProbed = true; + try { + this.cachedPointerServiceBackend = await tryCreatePointerServiceBackend(); + } catch { + this.cachedPointerServiceBackend = null; + } + } + if (this.cachedPointerServiceBackend) { + return this.cachedPointerServiceBackend; + } + } + + // Tier 1: SimulatorKit HID (headless, works with any app — all Xcode versions) + if (!this.simHidProbed) { + this.simHidProbed = true; + try { + this.cachedSimHidBackend = await tryCreateSimulatorKitHIDBackend(); + } catch { + this.cachedSimHidBackend = null; + } + } + if (this.cachedSimHidBackend) { + return this.cachedSimHidBackend; + } + + // Probe simctl only when the Tier 2 simctl branch is actually reached. + // The probe is device-independent (`simctl help io`) so a transient device + // fault cannot poison the cached `simctlAvailable` flag for the lifetime of + // the resolver, and Tier 0/Tier 1 successes do not pay this extra cost. + if (this.simctlAvailable === null) { + if (!this.detectionPromise) { + this.detectionPromise = probeSimctlInput().then((available) => { + this.simctlAvailable = available; + return available; + }); + } + await this.detectionPromise; + } + + // Tier 2: simctl io input (headless, works with any app — Xcode ≤16) + if (this.simctlAvailable) { + if (!this.cachedSimctlBackend) { + this.cachedSimctlBackend = new SimctlInputBackend(); + } + return this.cachedSimctlBackend; + } + + // Tier 2: WebKit JS touch injection (headless, Safari web content only). + // If the client is present but disconnected, try a one-shot reconnect so + // transient drops (proxy restart, tab churn) do not flip us to Tier 3. + if (webkitClient) { + if (webkitClient.isConnected()) { + return new WebKitInputBackend(webkitClient); + } + const reconnected = await tryReconnectWebKit(webkitClient); + if (reconnected) { + return new WebKitInputBackend(webkitClient); + } + } + + // HEADLESS_ONLY safety net — block AppleScript fallback even if opt-in is set. + if (isHeadlessOnly()) { + if (isFocusInputAllowed()) { + console.error( + `[input-backend] ${OPENSAFARI_HEADLESS_ONLY_ENV}=1 overrides ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV} — AppleScript backend disabled`, + ); + } + const reason: HeadlessInputUnavailableError['reason'] = 'headless-only'; + const err = new HeadlessInputUnavailableError(deviceId, reason); + console.error(`[input-backend] ${err.message}`); + throw err; + } + + // Tier 3: AppleScript/CGEvent fallback — DEFAULT-DENY. + if (!isFocusInputAllowed()) { + let reason: HeadlessInputUnavailableError['reason']; + if (!webkitClient) { + reason = 'no-webkit'; + } else { + reason = 'webkit-disconnected'; + } + const err = new HeadlessInputUnavailableError(deviceId, reason); + console.error(`[input-backend] ${err.message}`); + throw err; + } + + if (!this.focusInputOptInWarned) { + console.error( + `[input-backend] ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV}=1 is set — ` + + 'AppleScript/CGEvent backend is enabled. ' + + 'This will move the physical mouse cursor and activate Simulator.app.', + ); + this.focusInputOptInWarned = true; + } + + if (!this.cachedAppleScriptBackend) { + this.cachedAppleScriptBackend = new AppleScriptInputBackend(); + } + return this.cachedAppleScriptBackend; + } + + /** + * Clear all cached state. Equivalent to constructing a fresh instance. + * Exported for testing via the module-level `resetInputBackend()` shim. + */ + reset(): void { + this.simctlAvailable = null; + this.detectionPromise = null; + this.cachedSimctlBackend = null; + this.cachedAppleScriptBackend = null; + this.focusInputOptInWarned = false; + this.simHidProbed = false; + this.cachedSimHidBackend = null; + this.pointerServiceProbed = false; + this.cachedPointerServiceBackend = null; + this.flutterResolver.reset(); + } + + /** + * Override the Flutter VM resolver. Pass `null` to restore the default. + * Intended for unit tests only — do not call from production code. + */ + setFlutterVMResolver(resolver: FlutterVMResolver | null): void { + this.flutterResolver.setResolver(resolver); + } + + /** + * Attempt to resolve a FlutterVMClient for this device. Returns null whenever + * the device is not running a Flutter app in debug/profile mode. Never throws. + * + * Exposed so callers (e.g. routing diagnostics) can probe availability + * without spinning up the full backend tier chain. + */ + async tryGetFlutterVMClient(deviceId: string): Promise { + return this.flutterResolver.resolve(deviceId); + } + + /** + * Current number of entries in the Flutter VM discovery cache. + * Exposed for the cache-budget survey (#554). + */ + getFlutterClientCacheSize(): number { + return this.flutterResolver.cacheSize(); + } +} + +// ── Module-level singleton ──────────────────────────────────────────────────── + +/** + * Default singleton resolver used by the compatibility shim in + * `src/tools/native-input-backend.ts`. Production callers go through that + * shim; tests that need isolation should construct their own `InputBackendResolver`. + */ +export const defaultResolver = new InputBackendResolver(); diff --git a/src/input/backend.ts b/src/input/backend.ts new file mode 100644 index 00000000..94d70fc9 --- /dev/null +++ b/src/input/backend.ts @@ -0,0 +1,96 @@ +/** + * Shared interface and type definitions for native input backends. + * + * Split from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. All concrete backends and the resolver depend on this module; + * nothing here depends on them (no cycles). + */ + +/** + * Stable identifier for each concrete input backend. Included in tool call + * results so MCP clients and users can audit which path dispatched their + * input — useful when diagnosing focus-theft reports or confirming that a + * call stayed on a headless tier. + */ +export type InputBackendKind = + | 'flutter-vm' + | 'simctl' + | 'webkit' + | 'applescript' + | 'simhid' + | 'ax-press' + | 'pointer-service'; + +/** + * A single tap event used in batch dispatch. Mirrors the signature of + * `InputBackend.tap` but excludes the `deviceId` (supplied once at the + * batch-call level) to avoid repetition in large queues. + */ +export interface BatchTapEvent { + x: number; + y: number; + /** Optional long-press duration in seconds. */ + duration?: number; +} + +export interface InputBackend { + /** Stable identifier used for observability / audit logging. */ + readonly kind: InputBackendKind; + + tap(deviceId: string, x: number, y: number, duration?: number): Promise; + swipe( + deviceId: string, + startX: number, + startY: number, + endX: number, + endY: number, + duration?: number, + ): Promise; + /** + * Type `text` into whatever is currently focused on `deviceId`. + * + * `delayMs` is an optional inter-character pause between consecutive key + * sends, in milliseconds. Only the simhid backend honours it (other + * backends bypass the software keyboard and have no equivalent failure + * mode); they may safely ignore the argument. Required for segmented + * OTP-style fields that drop characters when keys arrive too fast (issue + * #639 Problem 2). Default 0 (no pause). + */ + typeText(deviceId: string, text: string, delayMs?: number): Promise; + keypress(deviceId: string, keyCode: string): Promise; + sendKey(deviceId: string, keyName: string): Promise; + + /** + * Whether this backend supports the `tapBatch()` method for submitting + * multiple tap events in a single logical call. Callers MUST check this + * before calling `tapBatch()` — the method is absent on backends that + * return `false`. + * + * **Unsupported combinations**: `tapBatch` is intentionally NOT available + * on `SimctlInputBackend` (each simctl invocation opens a separate Xcode + * process, so batching at the TS level provides no meaningful reduction), + * `WebKitInputBackend` (JS injection is already in-process with no spawn + * cost), `FlutterVMInputBackend` (same — evaluate over a WebSocket), + * `AppleScriptInputBackend` (opt-in focus-stealing path; batching would + * hide per-tap activation overhead rather than remove it), and + * `PointerServiceInputBackend` (tap-ps subcommand is experimental; + * batching is deferred until Phase 2 of #590). + */ + supportsBatching(): boolean; + + /** + * Submit multiple tap events to `deviceId` sequentially, reducing the + * per-call overhead that a caller would otherwise pay by invoking + * `tap()` in a loop. + * + * Only available when `supportsBatching()` returns `true`. Callers must + * guard with `supportsBatching()` before calling this method; calling + * it on a backend that does not advertise batching support is a + * programming error and will throw. + * + * The events are dispatched in order. If any event fails, the batch + * stops and rejects with that error — already-dispatched events are + * NOT rolled back (HID injection is fire-and-forget at the OS level). + */ + tapBatch?(deviceId: string, events: BatchTapEvent[]): Promise; +} diff --git a/src/input/flutter-resolver.ts b/src/input/flutter-resolver.ts new file mode 100644 index 00000000..29b8d64c --- /dev/null +++ b/src/input/flutter-resolver.ts @@ -0,0 +1,185 @@ +/** + * Flutter VM resolver — discovers a connected FlutterVMClient for a device. + * + * Extracted from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. Resolution policy is strictly unchanged: returns null for native + * iOS apps and devices without a Flutter debug/profile build. + */ + +import type { FlutterVMClient } from '../flutter'; +import { getFlutterVMClient, removeFlutterVMClient } from '../flutter'; + +// ── Cache entry ────────────────────────────────────────────────────────────── + +// Per-device cache of the Flutter VM client connection so subsequent Tier-0 +// lookups reuse an already-established WebSocket instead of re-running +// discovery on every call. +// +// Value semantics: +// - FlutterVMClient: positive hit (Flutter app connected; reuse) +// - null: negative hit (discovery already failed within NEGATIVE_CACHE_TTL_MS; +// skip discovery and let the caller fall through to Tier 1-3) +interface FlutterClientCacheEntry { + client: FlutterVMClient | null; + expiresAt: number; +} + +// Negative cache TTL: after a failed discovery, don't re-probe for this long. +// Native iOS apps, Safari, and any simulator without a Flutter debug build +// would otherwise pay the full discovery cost on every `getInputBackend()` +// call, stalling tools like `app_scroll_native` / `app_tap` well past their +// unit-test timeouts. +const NEGATIVE_CACHE_TTL_MS = 30_000; + +// Upper bound on how long the initial VM-discovery probe is allowed to block. +// If discovery has not produced a connected client within this window, treat +// the device as non-Flutter so native-app code paths aren't penalised. +const DISCOVERY_TIMEOUT_MS = 1_500; + +// ── Resolver type ───────────────────────────────────────────────────────────── + +/** + * Overridable resolver that returns a connected `FlutterVMClient` for the + * device, or `null` when no Flutter VM is discoverable (native app, Safari, + * simulator without Flutter debug build). + */ +export type FlutterVMResolver = (deviceId: string) => Promise; + +// ── Default resolver implementation ────────────────────────────────────────── + +async function defaultFlutterVMResolver( + deviceId: string, + cache: Map, +): Promise { + const now = Date.now(); + const cached = cache.get(deviceId); + if (cached && cached.expiresAt > now) { + // Fast path: cached positive hit that is still connected. + if (cached.client && cached.client.isConnected()) { + return cached.client; + } + // Fast path: cached negative hit within TTL. + if (cached.client === null) { + return null; + } + // Stale positive entry (client disconnected). Fall through to re-probe. + } + + // Bound the discovery probe so non-Flutter devices don't stall tools + // that legitimately just want Tier 1-3. + try { + const client = getFlutterVMClient(deviceId); + if (!client.isConnected()) { + let timeoutId: ReturnType | undefined; + const explicitUrl = process.env.OPENSAFARI_VM_SERVICE_URL; + const effectiveTimeout = explicitUrl ? 10_000 : DISCOVERY_TIMEOUT_MS; + const timeout = new Promise((_, reject) => { + timeoutId = setTimeout( + () => reject(new Error('flutter-vm-discovery-timeout')), + effectiveTimeout, + ); + }); + try { + await Promise.race([client.connect({ deviceId, vmServiceUrl: process.env.OPENSAFARI_VM_SERVICE_URL || undefined }), timeout]); + } finally { + if (timeoutId) clearTimeout(timeoutId); + } + } + if (!client.isConnected()) { + cache.set(deviceId, { + client: null, + expiresAt: now + NEGATIVE_CACHE_TTL_MS, + }); + return null; + } + // The VM is reachable, but FlutterVMInputBackend can only drive input + // through `evaluate` — which requires DDS + the frontend compiler + // (debug/profile builds only). Release builds and apps launched via + // `xcrun simctl launch` expose the VM Service socket without the + // compile service, and any `evaluate` call rejects with `code: 113`. + // Probe once up-front so that case falls through to the next tier + // instead of surfacing the raw 113 error to the user. + const probe = await client.probeEvaluateCompile(); + if (!probe.available) { + // Close the orphaned WebSocket — the client is not reusable on negative + // probe, so leaving it in the singleton map leaks a file descriptor per + // discovery cycle on release-mode Flutter apps. + removeFlutterVMClient(deviceId); + if (probe.reason === 'compile-error-113') { + console.error( + `[input-backend] Flutter VM on ${deviceId} rejects evaluate (code 113). ` + + 'Likely a release build or `simctl launch` without `flutter run` — ' + + 'falling back past Tier 0. Set OPENSAFARI_DISABLE_AX_PRESS=0 to use ' + + 'Tier 1.5 for element-targeted taps.', + ); + } + cache.set(deviceId, { + client: null, + expiresAt: now + NEGATIVE_CACHE_TTL_MS, + }); + return null; + } + cache.set(deviceId, { client, expiresAt: Infinity }); + return client; + } catch { + // VM discovery / connect failures are expected for non-Flutter apps. + // Cache the negative result so the next call doesn't pay the probe cost. + cache.set(deviceId, { + client: null, + expiresAt: now + NEGATIVE_CACHE_TTL_MS, + }); + return null; + } +} + +// ── FlutterVMResolverInstance ───────────────────────────────────────────────── + +/** + * Encapsulates the Flutter VM discovery cache and resolver override, owned per + * `InputBackendResolver` instance so state is never shared across instances. + */ +export class FlutterVMResolverInstance { + private cache = new Map(); + private resolver: FlutterVMResolver; + + constructor() { + this.resolver = (deviceId) => defaultFlutterVMResolver(deviceId, this.cache); + } + + /** + * Attempt to resolve a FlutterVMClient for this device. Returns null whenever + * the device is not running a Flutter app in debug/profile mode. Never + * throws — VM discovery errors collapse to null so the tier fallback keeps + * working for native iOS apps. + */ + async resolve(deviceId: string): Promise { + try { + return await this.resolver(deviceId); + } catch { + return null; + } + } + + /** + * Override the resolver. Pass `null` to restore the default. + * Intended for unit tests only. + */ + setResolver(resolver: FlutterVMResolver | null): void { + if (resolver === null) { + this.resolver = (deviceId) => defaultFlutterVMResolver(deviceId, this.cache); + } else { + this.resolver = resolver; + } + } + + /** Current number of entries in the cache (positive + negative). */ + cacheSize(): number { + return this.cache.size; + } + + /** Clear all cached entries and reset the resolver to default. */ + reset(): void { + this.cache.clear(); + this.resolver = (deviceId) => defaultFlutterVMResolver(deviceId, this.cache); + } +} diff --git a/src/input/flutter-vm-backend.ts b/src/input/flutter-vm-backend.ts new file mode 100644 index 00000000..40af1581 --- /dev/null +++ b/src/input/flutter-vm-backend.ts @@ -0,0 +1,535 @@ +/** + * FlutterVMInputBackend — Tier-0 headless input backend for Flutter apps. + * + * Dispatches pointer/keyboard/text events directly to the Flutter engine via + * the Dart VM Service (`FlutterVMClient.evaluate`). Because the events are + * synthesised inside the running Dart isolate and fed straight into + * `PlatformDispatcher.onPointerDataPacket`, this backend: + * + * - **Does not move the physical mouse cursor** (no CGEvent) + * - **Does not bring Simulator.app to the foreground** (no AppleScript + * activation) + * - **Requires no opt-in env var** — it is truly headless + * + * Compared to the three existing tiers (simctl → webkit → applescript), this + * path is picked first whenever the target device is running a Flutter app in + * debug/profile mode and the VM Service URL can be discovered. Native UIKit + * apps continue to flow through the existing tiers unchanged. + * + * Coordinate system: iOS AX frames are expressed in logical points (the same + * units Flutter calls "logical pixels"). The Dart payload multiplies by the + * implicit view's `devicePixelRatio` to land on the engine's `physicalX/Y` + * expectations. + * + * See issue #481 for the motivation and rollout checklist. + * + * Moved from `src/tools/flutter-vm-input-backend.ts` as part of the #707 (b) + * consolidation. The old path is kept as a re-export shim for compatibility. + */ + +import type { FlutterVMClient } from '../flutter'; +import { FlutterVMError } from '../flutter'; +import type { InputBackend, InputBackendKind } from './backend'; +import { timedInput } from '../metrics/input-telemetry'; + +/** + * Structured error surfaced by FlutterVMInputBackend when the underlying VM + * Service call fails (connection drop, Dart exception, timeout, etc). Carries + * the originating op so observability layers can attribute the failure. + */ +/** + * Structured error codes attached to `FlutterVMInputBackendError`. + * + * Today the code table is deliberately small — callers branch only on + * `VM_NO_EVALUATE` (release-build / no-DDS fallback signal) vs "anything + * else" which is surfaced to the user as a concrete failure. Expand this + * as we learn which failure modes the callers actually need to + * discriminate. + */ +export type FlutterVMInputBackendErrorCode = + /** `evaluate` rejected with code 113 — VM cannot compile expressions. */ + | 'VM_NO_EVALUATE' + /** Dart code ran but raised / returned an @Error. */ + | 'DART_ERROR' + /** Any other cause (connection drop, timeout, unknown). */ + | 'UNKNOWN'; + +export class FlutterVMInputBackendError extends Error { + readonly name = 'FlutterVMInputBackendError' as const; + readonly op: 'tap' | 'swipe' | 'typeText' | 'keypress' | 'sendKey'; + readonly cause: unknown; + readonly code: FlutterVMInputBackendErrorCode; + + constructor( + op: FlutterVMInputBackendError['op'], + cause: unknown, + ) { + const msg = cause instanceof Error ? cause.message : String(cause); + const code: FlutterVMInputBackendErrorCode = classifyFlutterVMCause(cause); + // Release / no-DDS builds emit a verbose JSON-RPC string — replace it + // with a short, actionable diagnostic so tool consumers see a single + // clean message instead of the compiler's internal error payload. + const userMessage = + code === 'VM_NO_EVALUATE' + ? `FlutterVMInputBackend.${op} failed: VM cannot compile expressions ` + + `(code 113). This app is likely a release build or was launched ` + + `with \`simctl launch\` instead of \`flutter run\`. ` + + `Use Tier 1.5 AX press (app_tap_element / app_type_element) or ` + + `relaunch under \`flutter run --debug\` for full gesture coverage. ` + + `See docs/ci-recipes.md#qa-ready-flutter-build for the ` + + `simulator (--debug) and physical-device (--profile) recipes ` + + `that keep Tier 0 available.` + : `FlutterVMInputBackend.${op} failed: ${msg}`; + super(userMessage); + this.op = op; + this.cause = cause; + this.code = code; + Object.setPrototypeOf(this, FlutterVMInputBackendError.prototype); + } +} + +function classifyFlutterVMCause(cause: unknown): FlutterVMInputBackendErrorCode { + const msg = cause instanceof Error ? cause.message : String(cause); + if (/\(code:\s*113\)/.test(msg)) return 'VM_NO_EVALUATE'; + // The inner `evalOrThrow` wraps Dart-side errors with code `DART_ERROR` + // via `FlutterVMError`; preserve that classification when bubbling up. + if ( + cause && + typeof cause === 'object' && + 'code' in cause && + (cause as { code?: string }).code === 'DART_ERROR' + ) { + return 'DART_ERROR'; + } + return 'UNKNOWN'; +} + +/** + * HID key-code → Dart `LogicalKeyboardKey` identifier. The keyIds match the + * values exposed by `package:flutter/services.dart` so the Dart payload can + * materialise a `KeyDownEvent` / `KeyUpEvent` pair. + */ +const HID_TO_LOGICAL_KEY: Record = { + '40': { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, + '41': { keyId: 'LogicalKeyboardKey.escape', keyLabel: 'Escape', physicalKey: 'PhysicalKeyboardKey.escape' }, + '42': { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, + '43': { keyId: 'LogicalKeyboardKey.tab', keyLabel: 'Tab', physicalKey: 'PhysicalKeyboardKey.tab' }, + '44': { keyId: 'LogicalKeyboardKey.space', keyLabel: ' ', physicalKey: 'PhysicalKeyboardKey.space' }, + '74': { keyId: 'LogicalKeyboardKey.home', keyLabel: 'Home', physicalKey: 'PhysicalKeyboardKey.home' }, + '79': { keyId: 'LogicalKeyboardKey.arrowRight', keyLabel: 'ArrowRight', physicalKey: 'PhysicalKeyboardKey.arrowRight' }, + '80': { keyId: 'LogicalKeyboardKey.arrowLeft', keyLabel: 'ArrowLeft', physicalKey: 'PhysicalKeyboardKey.arrowLeft' }, + '81': { keyId: 'LogicalKeyboardKey.arrowDown', keyLabel: 'ArrowDown', physicalKey: 'PhysicalKeyboardKey.arrowDown' }, + '82': { keyId: 'LogicalKeyboardKey.arrowUp', keyLabel: 'ArrowUp', physicalKey: 'PhysicalKeyboardKey.arrowUp' }, +}; + +const SENDKEY_TO_LOGICAL_KEY: Record = { + Return: { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, + Enter: { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, + Escape: { keyId: 'LogicalKeyboardKey.escape', keyLabel: 'Escape', physicalKey: 'PhysicalKeyboardKey.escape' }, + Tab: { keyId: 'LogicalKeyboardKey.tab', keyLabel: 'Tab', physicalKey: 'PhysicalKeyboardKey.tab' }, + Space: { keyId: 'LogicalKeyboardKey.space', keyLabel: ' ', physicalKey: 'PhysicalKeyboardKey.space' }, + Delete: { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, + Backspace: { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, + Home: { keyId: 'LogicalKeyboardKey.home', keyLabel: 'Home', physicalKey: 'PhysicalKeyboardKey.home' }, + ArrowRight: { keyId: 'LogicalKeyboardKey.arrowRight', keyLabel: 'ArrowRight', physicalKey: 'PhysicalKeyboardKey.arrowRight' }, + ArrowLeft: { keyId: 'LogicalKeyboardKey.arrowLeft', keyLabel: 'ArrowLeft', physicalKey: 'PhysicalKeyboardKey.arrowLeft' }, + ArrowDown: { keyId: 'LogicalKeyboardKey.arrowDown', keyLabel: 'ArrowDown', physicalKey: 'PhysicalKeyboardKey.arrowDown' }, + ArrowUp: { keyId: 'LogicalKeyboardKey.arrowUp', keyLabel: 'ArrowUp', physicalKey: 'PhysicalKeyboardKey.arrowUp' }, +}; + +/** + * Format a finite number for interpolation into a Dart literal. Reject NaN / + * ±Infinity so the VM Service never receives a syntactically invalid + * expression (e.g. `Offset(NaN, NaN)` would confuse the analyser). + */ +function dartNum(value: number, label: string): string { + if (!Number.isFinite(value)) { + throw new Error(`Invalid ${label}: ${value} (must be finite)`); + } + // toString() preserves sufficient precision for pixel coordinates. + return value.toString(); +} + +/** + * Escape a JS string for safe embedding inside a Dart single-quoted literal. + * Dart string escape rules are similar to JS but the safest approach is to + * emit a Dart list-of-codeUnits from JSON.stringify, avoiding any ambiguity + * over dollar interpolation, backslashes, quotes, or non-ASCII. + */ +function dartStringLiteral(value: string): string { + // Encode via a Dart `String.fromCharCodes` so we never have to worry about + // dollar-sign interpolation, adjacent quotes, or embedded newlines. + const codeUnits: number[] = []; + for (let i = 0; i < value.length; i++) { + codeUnits.push(value.charCodeAt(i)); + } + return `String.fromCharCodes(const [${codeUnits.join(',')}])`; +} + +/** + * FlutterVMInputBackend — implements the InputBackend contract by evaluating + * Dart expressions inside the target app's main isolate. + */ +export class FlutterVMInputBackend implements InputBackend { + readonly kind: InputBackendKind = 'flutter-vm'; + private libIdCache: Map = new Map(); + + constructor(private vmClient: FlutterVMClient) {} + + /** + * Resolve a Flutter library URI to its VM Service library id, caching the + * result per URI. Each operation targets a different library so that the + * required symbols are in lexical scope: + * - pointer dispatch → `mouse_tracker.dart` (bare `import 'dart:ui'`) + * - text input → `editable_text.dart` + * - key events → `hardware_keyboard.dart` + * + * Same pattern used by `FlutterVMClient.selectWidgetAtPoint` for the + * inspector library (see `vm-service-client.ts:386-411`). + */ + private async resolveLibId(uri: string): Promise { + const cached = this.libIdCache.get(uri); + if (cached) return cached; + + const isolateId = this.vmClient.getState()?.mainIsolateId; + if (!isolateId) { + throw new FlutterVMError('No main isolate', 'NO_ISOLATE'); + } + const isolate = await this.vmClient.callMethod('getIsolate', { isolateId }); + const libs = + (isolate as { libraries?: Array<{ uri?: string; id?: string }> }).libraries ?? []; + const lib = libs.find((l) => l.uri === uri); + if (!lib?.id) { + throw new FlutterVMError( + `${uri} library not loaded in isolate — is this a Flutter app?`, + 'NO_BINDING_LIB', + ); + } + this.libIdCache.set(uri, lib.id); + return lib.id; + } + + /** + * Synthesise a pointer down → up sequence. When `duration` (in seconds) is + * positive the up event is timestamped `duration * 1000` ms after the down + * event so Flutter's gesture arena treats it as a long-press rather than a + * tap. The Dart payload ends with + * `SchedulerBinding.instance.scheduleFrame()` to ensure the engine pumps + * the event queue even in a quiescent state. + */ + async tap( + deviceId: string, + x: number, + y: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'tap', deviceId, () => this.tapInternal(x, y, duration)); + } + + private async tapInternal( + x: number, + y: number, + duration?: number, + ): Promise { + let xStr: string; + let yStr: string; + try { + xStr = dartNum(x, 'x'); + yStr = dartNum(y, 'y'); + } catch (err) { + throw new FlutterVMInputBackendError('tap', err); + } + const durMs = duration && duration > 0 ? Math.round(duration * 1000) : 0; + + const expression = + '(() {' + + ' final dispatcher = PlatformDispatcher.instance;' + + ' final view = dispatcher.implicitView;' + + ' if (view == null) { return false; }' + + ' final dpr = view.devicePixelRatio;' + + ` final double px = ${xStr} * dpr;` + + ` final double py = ${yStr} * dpr;` + + ` final int downUs = 0;` + + ` final int upUs = ${durMs} * 1000;` + + ' void dispatch(int tUs, PointerChange change) {' + + ' final packet = PointerDataPacket(data: [' + + ' PointerData(' + + ' timeStamp: Duration(microseconds: tUs),' + + ' change: change,' + + ' kind: PointerDeviceKind.touch,' + + ' device: 1,' + + ' pointerIdentifier: 1,' + + ' physicalX: px,' + + ' physicalY: py,' + + ' buttons: change == PointerChange.up ? 0 : 1,' + + ' pressure: change == PointerChange.up ? 0.0 : 1.0,' + + ' pressureMax: 1.0,' + + ' ),' + + ' ]);' + + ' dispatcher.onPointerDataPacket?.call(packet);' + + ' }' + + ' dispatch(downUs, PointerChange.add);' + + ' dispatch(downUs, PointerChange.down);' + + ' dispatch(upUs, PointerChange.up);' + + ' dispatch(upUs, PointerChange.remove);' + + ' return true;' + + '})()'; + + await this.evalOrThrow('tap', expression, 'package:flutter/src/rendering/mouse_tracker.dart'); + } + + /** + * Synthesise a drag gesture as a down → N×move → up sequence. `duration` + * (seconds) spreads the move events evenly across the requested window so + * the gesture arena classifies it as a swipe rather than a flick or tap. + */ + async swipe( + deviceId: string, + startX: number, + startY: number, + endX: number, + endY: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'swipe', deviceId, () => + this.swipeInternal(startX, startY, endX, endY, duration), + ); + } + + private async swipeInternal( + startX: number, + startY: number, + endX: number, + endY: number, + duration?: number, + ): Promise { + const sxStr = dartNum(startX, 'startX'); + const syStr = dartNum(startY, 'startY'); + const exStr = dartNum(endX, 'endX'); + const eyStr = dartNum(endY, 'endY'); + const totalMs = Math.max(1, Math.round((duration ?? 0.5) * 1000)); + const steps = 20; + const stepUs = Math.round((totalMs * 1000) / steps); + + const expression = + '(() {' + + ' final dispatcher = PlatformDispatcher.instance;' + + ' final view = dispatcher.implicitView;' + + ' if (view == null) { return false; }' + + ' final dpr = view.devicePixelRatio;' + + ` final double sx = ${sxStr} * dpr;` + + ` final double sy = ${syStr} * dpr;` + + ` final double ex = ${exStr} * dpr;` + + ` final double ey = ${eyStr} * dpr;` + + ` const int steps = ${steps};` + + ` const int stepUs = ${stepUs};` + + ' void post(int tUs, PointerChange change, double x, double y) {' + + ' final packet = PointerDataPacket(data: [' + + ' PointerData(' + + ' timeStamp: Duration(microseconds: tUs),' + + ' change: change,' + + ' kind: PointerDeviceKind.touch,' + + ' device: 1,' + + ' pointerIdentifier: 1,' + + ' physicalX: x,' + + ' physicalY: y,' + + ' buttons: change == PointerChange.up ? 0 : 1,' + + ' pressure: change == PointerChange.up ? 0.0 : 1.0,' + + ' pressureMax: 1.0,' + + ' ),' + + ' ]);' + + ' dispatcher.onPointerDataPacket?.call(packet);' + + ' }' + + ' post(0, PointerChange.add, sx, sy);' + + ' post(0, PointerChange.down, sx, sy);' + + ' for (int i = 1; i <= steps; i++) {' + + ' final double t = i / steps;' + + ' final double x = sx + (ex - sx) * t;' + + ' final double y = sy + (ey - sy) * t;' + + ' post(stepUs * i, PointerChange.move, x, y);' + + ' }' + + ' final int endUs = stepUs * steps;' + + ' post(endUs, PointerChange.up, ex, ey);' + + ' post(endUs, PointerChange.remove, ex, ey);' + + ' return true;' + + '})()'; + + await this.evalOrThrow('swipe', expression, 'package:flutter/src/rendering/mouse_tracker.dart'); + } + + /** + * Inject text into the currently-focused `EditableText` via Flutter's + * `TextInput` channel. This mirrors what the iOS IME would send when the + * user types on the system keyboard, so controllers and `onChanged` + * callbacks fire naturally. Falls through silently (no-op) if nothing is + * focused — same behaviour as WebKitInputBackend. + */ + async typeText(deviceId: string, text: string): Promise { + await timedInput(this.kind, 'typeText', deviceId, () => this.typeTextInternal(text)); + } + + private async typeTextInternal(text: string): Promise { + const textLit = dartStringLiteral(text); + + // Read the live TextInputConnection client id so the platform message + // targets the correct connection. The Flutter framework drops messages + // where args[0] != _currentConnection._id, so hardcoding -1 would be + // a silent no-op. We read the id via TextInput._currentConnection._id, + // which is private but accessible via evaluate on the binding library. + const expression = + '(() async {' + + ` final String newText = ${textLit};` + + ' // Read the current TextInputConnection client id.' + + ' // If nothing is focused, fall back to -1 (message is dropped, same' + + ' // as typing on a hardware keyboard with no focused field).' + + ' int clientId = -1;' + + ' try {' + + ' // Fallback: just check if the primary focus accepts text.' + + ' final focused = FocusManager.instance.primaryFocus;' + + ' if (focused != null && focused.context != null) {' + + ' final editable = focused.context!.findAncestorStateOfType();' + + ' if (editable != null) {' + + ' // Force update through the editable directly — this is' + + ' // the most reliable path since it bypasses the private _id.' + + ' final ctrl = editable.textEditingValue;' + + ' editable.userUpdateTextEditingValue(' + + ' TextEditingValue(' + + ' text: ctrl.text + newText,' + + ' selection: TextSelection.collapsed(offset: ctrl.text.length + newText.length),' + + ' ),' + + ' SelectionChangedCause.keyboard,' + + ' );' + + ' return true;' + + ' }' + + ' }' + + ' } catch (_) {}' + + ' // Fallback: deliver via platform channel with best-effort client id.' + + ' final Map state = {' + + ' "text": newText,' + + ' "selectionBase": newText.length,' + + ' "selectionExtent": newText.length,' + + ' "selectionAffinity": "TextAffinity.downstream",' + + ' "selectionIsDirectional": false,' + + ' "composingBase": -1,' + + ' "composingExtent": -1,' + + ' };' + + ' final message = const JSONMethodCodec().encodeMethodCall(' + + ' MethodCall("TextInputClient.updateEditingState", [clientId, state]),' + + ' );' + + ' await WidgetsBinding.instance!.defaultBinaryMessenger.handlePlatformMessage(' + + ' "flutter/textinput",' + + ' message,' + + ' (dynamic _) {},' + + ' );' + + ' return true;' + + '})()'; + + await this.evalOrThrow('typeText', expression, 'package:flutter/src/widgets/editable_text.dart'); + } + + /** + * Dispatch a HID key code through `HardwareKeyboard`. Only a curated set of + * control keys is supported — matches the WebKit/AppleScript backends. + */ + async keypress(deviceId: string, keyCode: string): Promise { + await timedInput(this.kind, 'keypress', deviceId, () => this.keypressInternal(keyCode)); + } + + private async keypressInternal(keyCode: string): Promise { + const entry = HID_TO_LOGICAL_KEY[keyCode]; + if (!entry) { + throw new Error( + `Unknown HID key code "${keyCode}" for FlutterVM backend. ` + + `Supported: ${Object.keys(HID_TO_LOGICAL_KEY).join(', ')}`, + ); + } + await this.dispatchKey('keypress', entry.keyId, entry.keyLabel, entry.physicalKey); + } + + /** Dispatch a named key ("Return", "Escape", ...) through HardwareKeyboard. */ + async sendKey(deviceId: string, keyName: string): Promise { + await timedInput(this.kind, 'sendKey', deviceId, () => this.sendKeyInternal(keyName)); + } + + /** + * Batching is not supported on FlutterVMInputBackend. Events are injected + * via `evaluate` over an already-open WebSocket — there is no process spawn + * overhead to reduce. Callers should use `tap()` in a loop for repeated + * events. + */ + supportsBatching(): boolean { + return false; + } + + private async sendKeyInternal(keyName: string): Promise { + const entry = SENDKEY_TO_LOGICAL_KEY[keyName]; + if (!entry) { + throw new Error( + `Unknown key name "${keyName}" for FlutterVM backend. ` + + `Supported: ${Object.keys(SENDKEY_TO_LOGICAL_KEY).join(', ')}`, + ); + } + await this.dispatchKey('sendKey', entry.keyId, entry.keyLabel, entry.physicalKey); + } + + // ── internals ────────────────────────────────────────────────────────── + + private async dispatchKey( + op: 'keypress' | 'sendKey', + logicalKeyExpr: string, + keyLabel: string, + physicalKeyExpr: string, + ): Promise { + const labelLit = dartStringLiteral(keyLabel); + // Emit a KeyDown event then a KeyUp through HardwareKeyboard so downstream + // focus nodes observe a complete press. `timeStamp` uses the default + // (zero) — the event queue does not require strict monotonicity. + const expression = + '(() {' + + ` final label = ${labelLit};` + + ` final logical = ${logicalKeyExpr};` + + ` final physical = ${physicalKeyExpr};` + + ' final down = KeyDownEvent(' + + ' physicalKey: physical,' + + ' logicalKey: logical,' + + ' timeStamp: Duration.zero,' + + ' character: label.length == 1 ? label : null,' + + ' );' + + ' final up = KeyUpEvent(' + + ' physicalKey: physical,' + + ' logicalKey: logical,' + + ' timeStamp: Duration.zero,' + + ' );' + + ' HardwareKeyboard.instance.handleKeyEvent(down);' + + ' HardwareKeyboard.instance.handleKeyEvent(up);' + + ' return true;' + + '})()'; + + await this.evalOrThrow(op, expression, 'package:flutter/src/services/hardware_keyboard.dart'); + } + + private async evalOrThrow( + op: FlutterVMInputBackendError['op'], + expression: string, + libraryUri: string, + ): Promise { + try { + // Scope the evaluate to the per-operation Flutter library so all + // required symbols are in lexical scope. + const targetId = await this.resolveLibId(libraryUri); + const result = await this.vmClient.evaluate(expression, { targetId }); + // VM returns an @Error shape instead of throwing when the expression + // itself compiled but raised a Dart exception. Surface that as a + // structured InputBackendError. + const errType = (result as { type?: string }).type; + if (errType === '@Error' || errType === 'Error') { + const message = + (result as { message?: string }).message ?? JSON.stringify(result); + throw new FlutterVMError(message, 'DART_ERROR'); + } + } catch (err) { + if (err instanceof FlutterVMInputBackendError) throw err; + throw new FlutterVMInputBackendError(op, err); + } + } +} diff --git a/src/input/pointer-service-backend.ts b/src/input/pointer-service-backend.ts new file mode 100644 index 00000000..c6016ab8 --- /dev/null +++ b/src/input/pointer-service-backend.ts @@ -0,0 +1,234 @@ +/** + * PointerServiceInputBackend — Phase 1 opt-in wrapper around the + * `sim-hid-bridge tap-ps` subcommand for Xcode 26+ coordinate tap. + * + * Motivation (issue #590): Apple dropped `IndigoHIDMessageForMouseNSEvent` + * handling in CoreSimulator on Xcode 26. The Tier-1 SimulatorKitHID bare + * mouse path has therefore been gated off for tap/swipe, forcing + * coordinate-based `app_tap` / `app_swipe_native` to fall through to the + * focus-stealing AppleScript / CGEvent backend. The PointerService probe + * shipped in #555 wraps the same mouse events with + * `IndigoHIDMessageToCreatePointerService` / `RemovePointerService` + * brackets; the synthesis doc (#557) falsified this as a *fix*, but it + * remains the most-likely-to-help interim stop-gap because: + * + * - It requires no simulator-side changes. + * - Telemetry is cheap to collect under an opt-in flag. + * - If field data shows ≥ 99 % success, #590 Phase 2 promotes it to the + * default Tier-1 path. + * + * Status: **opt-in experimental**. Activated only when + * `OPENSAFARI_ENABLE_POINTERSERVICE=1` is set. Defaults off so CI that + * has already adapted to the AppleScript fallback is unaffected. + * + * Scope: only `tap` is routed through the `tap-ps` subcommand. The Swift + * bridge does not yet expose `swipe-ps`; swipe / typeText / keypress / + * sendKey delegate to the underlying `SimulatorKitHIDInputBackend`, which + * keeps non-tap input paths on their existing (keyboard-safe) Tier-1 + * route. On Xcode 26+ that delegated SimHID swipe path is itself gated + * off and throws `HeadlessInputUnavailableError`; the PointerService + * backend is cached as the selected backend by `getInputBackend`, so the + * throw does NOT re-enter the tier chain. See the comment on `swipe()` + * below and issue #649 for the caller-visible contract. Extending + * `sim-hid-bridge` with pointer-service-bracketed swipe and promoting + * the backend to the default chain are tracked as Phase 2 follow-ups in + * #590. + * + * Moved from `src/tools/pointer-service-input-backend.ts` as part of the + * #707 (b) consolidation. The old path is kept as a re-export shim. + */ + +import { execFile } from 'child_process'; +import { existsSync } from 'fs'; +import * as path from 'path'; +import { promisify } from 'util'; +import type { InputBackend } from './backend'; +import { + SimulatorKitHIDInputBackend, + InputBackendError, +} from './sim-hid-backend'; +import { timedInput } from '../metrics/input-telemetry'; + +const execFileAsync = promisify(execFile); + +/** + * Env flag that enables the PointerService backend (Phase 1 opt-in). + * Accepted values: `1`, `true`. Anything else is ignored. + */ +export const OPENSAFARI_ENABLE_POINTERSERVICE_ENV = + 'OPENSAFARI_ENABLE_POINTERSERVICE'; + +export function isPointerServiceEnabled(): boolean { + const value = process.env[OPENSAFARI_ENABLE_POINTERSERVICE_ENV]; + return value === '1' || value === 'true'; +} + +/** + * PointerService tap + delegated swipe/keys. + * + * Composes a `SimulatorKitHIDInputBackend` for the subset of methods that + * still share the default `sim-hid-bridge` subcommand set, and shells out + * directly to `sim-hid-bridge tap-ps ...` for `tap`. + */ +export class PointerServiceInputBackend implements InputBackend { + readonly kind = 'pointer-service' as const; + + constructor( + private readonly bridgePath: string, + private readonly delegate: SimulatorKitHIDInputBackend, + ) {} + + async tap(deviceId: string, x: number, y: number, duration?: number): Promise { + await timedInput(this.kind, 'tap', deviceId, async () => { + const args = [deviceId, 'tap-ps', String(x), String(y)]; + if (duration !== undefined && duration > 0) { + args.push(String(duration)); + } + await runTapPs(this.bridgePath, args); + }); + } + + async swipe( + deviceId: string, + startX: number, + startY: number, + endX: number, + endY: number, + duration?: number, + ): Promise { + // Phase 1: no swipe-ps subcommand exists yet. Delegate straight to the + // underlying SimulatorKitHIDInputBackend. On Xcode 26+ the `sim-hid-bridge + // swipe` subcommand exits with `SIMULATORKIT_UNAVAILABLE` (or another + // non-zero SimulatorKit code), and the delegate surfaces that to the caller + // as an `InputBackendError` — not `HeadlessInputUnavailableError`, which is + // produced one layer up in `native-input-backend.getInputBackend` when + // selecting a backend, never from a backend's own swipe() method. + // + // That error does NOT re-enter the tier chain because + // PointerServiceInputBackend is cached as the selected backend in + // getInputBackend once OPENSAFARI_ENABLE_POINTERSERVICE=1 resolves it. For + // completeness, any other error type the delegate may raise in the future + // is also passed through unchanged; see the "swipe propagates … without + // wrapping" tests for the frozen contract. + // + // Callers that need swipe fallback on Xcode 26+ must either (a) leave + // OPENSAFARI_ENABLE_POINTERSERVICE unset so the standard Tier-1 SimHID / + // focus-input chain is selected for every call, or (b) invoke an + // element-targeted swipe so the AX-press tier handles the gesture. + // Promoting this to a real in-tool tier downgrade is tracked under #590 + // Phase 2 alongside `sim-hid-bridge swipe-ps`. See #649 for the decision + // record. + await this.delegate.swipe(deviceId, startX, startY, endX, endY, duration); + } + + async typeText(deviceId: string, text: string): Promise { + await this.delegate.typeText(deviceId, text); + } + + async keypress(deviceId: string, keyCode: string): Promise { + await this.delegate.keypress(deviceId, keyCode); + } + + async sendKey(deviceId: string, keyName: string): Promise { + await this.delegate.sendKey(deviceId, keyName); + } + + /** + * Batching is not supported on PointerServiceInputBackend. The `tap-ps` + * subcommand is Phase 1 experimental (opt-in via + * `OPENSAFARI_ENABLE_POINTERSERVICE=1`); batching is deferred to Phase 2 + * of #590 alongside the `swipe-ps` subcommand. Callers that need repeated + * taps via the pointer-service path must invoke `tap()` in a loop. + */ + supportsBatching(): boolean { + return false; + } +} + +/** + * Execute `sim-hid-bridge` with the given argv. Mirrors the spawn/parse + * contract of `SimulatorKitHIDInputBackend.run` but is narrowed to the + * single `tap-ps` subcommand used by the PointerService backend, so the + * failure-handling path stays local and auditable. + */ +async function runTapPs(bridgePath: string, args: string[]): Promise { + const isSwiftSource = bridgePath.endsWith('.swift'); + const cmd = isSwiftSource ? 'swift' : bridgePath; + const cmdArgs = isSwiftSource ? [bridgePath, ...args] : args; + + let stdout = ''; + let stderr = ''; + try { + const result = await execFileAsync(cmd, cmdArgs, { + timeout: 10_000, + maxBuffer: 1 * 1024 * 1024, + }); + stdout = result.stdout ?? ''; + stderr = result.stderr ?? ''; + } catch (err) { + const e = err as NodeJS.ErrnoException & { + stdout?: string; + stderr?: string; + code?: number | string; + killed?: boolean; + }; + stdout = e.stdout ?? ''; + stderr = e.stderr ?? ''; + const exit = typeof e.code === 'number' ? e.code : undefined; + const hint = stderr.trim() || stdout.trim() || e.message; + throw new InputBackendError( + `sim-hid-bridge tap-ps exited ${exit ?? '?'}: ${hint}`, + 'UNKNOWN', + stderr, + ); + } + + if (!stdout.trim()) return; + try { + const parsed = JSON.parse(stdout) as { ok?: boolean; error?: string }; + if (parsed.ok === false) { + throw new InputBackendError( + parsed.error ?? 'sim-hid-bridge tap-ps reported ok=false', + 'UNKNOWN', + stderr, + ); + } + } catch (err) { + if (err instanceof InputBackendError) throw err; + const safeStdout = stdout + .slice(0, 200) + .replace(/[\x00-\x1f\x7f]/g, '?'); + throw new InputBackendError( + `sim-hid-bridge tap-ps produced non-JSON stdout: ${safeStdout}`, + 'JSON_PARSE_FAILURE', + stderr, + ); + } +} + +/** + * Factory mirroring `tryCreateSimulatorKitHIDBackend`: locate a usable + * `sim-hid-bridge` helper and wrap it as a `PointerServiceInputBackend`. + * Returns `null` when the helper is not installed — callers are expected + * to fall through to the default SimHID tier. + */ +export async function tryCreatePointerServiceBackend(): Promise { + const candidates = [ + path.resolve(__dirname, '..', 'sim-hid-bridge'), + path.resolve(__dirname, 'sim-hid-bridge'), + path.resolve(__dirname, '..', 'sim-hid-bridge.swift'), + path.resolve(__dirname, 'sim-hid-bridge.swift'), + ]; + if (process.env.OPENSAFARI_ALLOW_SWIFT_INTERPRETER === '1') { + candidates.push( + path.resolve(__dirname, '..', '..', 'src', 'native', 'sim-hid-bridge.swift'), + ); + } + for (const candidate of candidates) { + if (existsSync(candidate)) { + const delegate = new SimulatorKitHIDInputBackend(candidate); + return new PointerServiceInputBackend(candidate, delegate); + } + } + return null; +} diff --git a/src/input/sim-hid-backend.ts b/src/input/sim-hid-backend.ts new file mode 100644 index 00000000..54579f35 --- /dev/null +++ b/src/input/sim-hid-backend.ts @@ -0,0 +1,522 @@ +/** + * SimulatorKitHIDInputBackend — Node wrapper around the `sim-hid-bridge` + * Swift helper described in issue #483. + * + * Status: PoC. Backend class is shipped for integration and unit testing, but + * routing in `native-input-backend.ts` is intentionally NOT wired up yet. See + * the `TODO(#483)` comment there. + * + * The Swift bridge spawns as a short-lived child process and communicates via + * argv (command) + stdout (newline-terminated JSON). Exit codes are the + * contract between Swift and Node: + * + * 0 — success + * 64 — BAD_ARGS (EX_USAGE) + * 69 — DEVICE_NOT_BOOTED (EX_UNAVAILABLE) + * 78 — SIMULATORKIT_UNAVAILABLE (EX_CONFIG — dlopen failed) + * 99 — NOT_IMPLEMENTED (PoC stub path) + * * — UNKNOWN (stderr surfaced verbatim) + * + * The current Swift implementation is a PoC stub that proves the dlopen path + * works and always exits with 99 NOT_IMPLEMENTED. This wrapper classifies that + * (and every other documented exit code) into a structured `InputBackendError` + * so the routing layer can decide to fall through to the next tier. + * + * Moved from `src/tools/sim-hid-input-backend.ts` as part of the #707 (b) + * consolidation. The old path is kept as a re-export shim for compatibility. + */ + +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import { existsSync } from 'fs'; +import * as path from 'path'; +import type { InputBackend, BatchTapEvent } from './backend'; +import { timedInput } from '../metrics/input-telemetry'; + +const execFileAsync = promisify(execFile); + +/** Reference appended to error messages for private-framework failures. */ +const PRIVATE_API_DOC_REF = 'See docs/private-apis.md'; + +/** Latch so the private-API warning is emitted only once per process. */ +let warnedAboutPrivateAPI = false; + +/** + * Reset the private-API warning latch. Exported for unit tests only — do not + * call from production code. + */ +export function resetSimHidPrivateAPIWarning(): void { + warnedAboutPrivateAPI = false; +} + +/** Spawn timeout for the Swift helper. Matches idb's default. */ +const SPAWN_TIMEOUT_MS = 10_000; + + +/** HID usage page 0x07 (Keyboard/Keypad) — subset we map for pressKey(). */ +const KEY_NAME_TO_HID_USAGE: Record = { + Enter: 0x28, + Return: 0x28, + Escape: 0x29, + Backspace: 0x2a, + Delete: 0x2a, + Tab: 0x2b, + Space: 0x2c, + ArrowRight: 0x4f, + ArrowLeft: 0x50, + ArrowDown: 0x51, + ArrowUp: 0x52, + Home: 0x4a, +}; + +/** + * HID usage of the LeftShift modifier (Keyboard/Keypad page 0x07). + * Sent alongside a character key via the bridge's `key-mod` subcommand for + * every ASCII symbol that requires Shift on a US keyboard (uppercase letters, + * `!@#$%^&*()_+{}|:"<>?~`). + */ +const HID_USAGE_LEFT_SHIFT = 0xe1; + +/** + * US-keyboard printable ASCII → HID usage + whether Shift must be held. + * + * Covers U+0020 (space) through U+007E (tilde) — i.e. every character produced + * by a US layout without dead keys or IME. Returns null for everything else, + * including control characters (tab, newline), DEL, and any non-ASCII byte. + * + * Reference: USB HID Usage Tables v1.21, §10 Keyboard/Keypad (page 0x07). + */ +function asciiToHidKey(ch: string): { usage: number; shift: boolean } | null { + if (ch.length !== 1) return null; + const code = ch.charCodeAt(0); + if (code < 0x20 || code > 0x7e) return null; + // Lowercase letters → HID 0x04..0x1D + if (code >= 0x61 && code <= 0x7a) return { usage: 0x04 + (code - 0x61), shift: false }; + // Uppercase letters → same keys, but Shift is required + if (code >= 0x41 && code <= 0x5a) return { usage: 0x04 + (code - 0x41), shift: true }; + // Digits '1'..'9' → 0x1E..0x26 + if (code >= 0x31 && code <= 0x39) return { usage: 0x1e + (code - 0x31), shift: false }; + if (code === 0x30) return { usage: 0x27, shift: false }; // '0' + switch (ch) { + case ' ': return { usage: 0x2c, shift: false }; + case '-': return { usage: 0x2d, shift: false }; + case '_': return { usage: 0x2d, shift: true }; + case '=': return { usage: 0x2e, shift: false }; + case '+': return { usage: 0x2e, shift: true }; + case '[': return { usage: 0x2f, shift: false }; + case '{': return { usage: 0x2f, shift: true }; + case ']': return { usage: 0x30, shift: false }; + case '}': return { usage: 0x30, shift: true }; + case '\\': return { usage: 0x31, shift: false }; + case '|': return { usage: 0x31, shift: true }; + case ';': return { usage: 0x33, shift: false }; + case ':': return { usage: 0x33, shift: true }; + case "'": return { usage: 0x34, shift: false }; + case '"': return { usage: 0x34, shift: true }; + case '`': return { usage: 0x35, shift: false }; + case '~': return { usage: 0x35, shift: true }; + case ',': return { usage: 0x36, shift: false }; + case '<': return { usage: 0x36, shift: true }; + case '.': return { usage: 0x37, shift: false }; + case '>': return { usage: 0x37, shift: true }; + case '/': return { usage: 0x38, shift: false }; + case '?': return { usage: 0x38, shift: true }; + case '!': return { usage: 0x1e, shift: true }; + case '@': return { usage: 0x1f, shift: true }; + case '#': return { usage: 0x20, shift: true }; + case '$': return { usage: 0x21, shift: true }; + case '%': return { usage: 0x22, shift: true }; + case '^': return { usage: 0x23, shift: true }; + case '&': return { usage: 0x24, shift: true }; + case '*': return { usage: 0x25, shift: true }; + case '(': return { usage: 0x26, shift: true }; + case ')': return { usage: 0x27, shift: true }; + } + return null; +} + +/** + * Error emitted by `SimulatorKitHIDInputBackend`. Mirrors the convention used + * by `AccessibilityBridgeError` (see `src/native/accessibility-bridge.ts`): + * a stable machine-readable `code` plus the human-readable `message`. + */ +export class InputBackendError extends Error { + readonly name = 'InputBackendError' as const; + constructor( + message: string, + public readonly code: InputBackendErrorCode, + public readonly stderr?: string, + ) { + super(message); + Object.setPrototypeOf(this, InputBackendError.prototype); + } +} + +export type InputBackendErrorCode = + | 'BAD_ARGS' + | 'DEVICE_NOT_BOOTED' + | 'SIMULATORKIT_UNAVAILABLE' + | 'NOT_IMPLEMENTED' + | 'SPAWN_TIMEOUT' + | 'BRIDGE_NOT_FOUND' + | 'HID_BRIDGE_MISSING' + | 'JSON_PARSE_FAILURE' + | 'UNKNOWN'; + +/** Map Swift bridge exit codes to structured error codes. */ +function codeForExit(exit: number | undefined): InputBackendErrorCode { + switch (exit) { + case 64: return 'BAD_ARGS'; + case 69: return 'DEVICE_NOT_BOOTED'; + case 78: return 'SIMULATORKIT_UNAVAILABLE'; + case 99: return 'NOT_IMPLEMENTED'; + default: return 'UNKNOWN'; + } +} + +/** + * SimulatorKit HID input backend. Spawns `sim-hid-bridge` per call and parses + * the JSON status envelope. All methods throw `InputBackendError` on failure. + */ +export class SimulatorKitHIDInputBackend implements InputBackend { + readonly kind = 'simhid' as const; + + constructor(private readonly bridgePath: string) {} + + async tap(deviceId: string, x: number, y: number, duration?: number): Promise { + await timedInput(this.kind, 'tap', deviceId, async () => { + const args = [deviceId, 'tap', String(x), String(y)]; + if (duration !== undefined && duration > 0) { + args.push(String(duration)); + } + await this.run(args); + }); + } + + async swipe( + deviceId: string, + startX: number, + startY: number, + endX: number, + endY: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'swipe', deviceId, async () => { + const args = [ + deviceId, 'swipe', + String(startX), String(startY), + String(endX), String(endY), + ]; + if (duration !== undefined && duration > 0) { + args.push(String(duration)); + } + await this.run(args); + }); + } + + async typeText(deviceId: string, text: string, delayMs = 0): Promise { + await timedInput(this.kind, 'typeText', deviceId, async () => { + // Printable US-ASCII only. Each character is mapped to a US-keyboard + // HID usage and sent as an independent event. Shifted characters + // (uppercase letters, symbols like `@!#$%^&*()_+{}|:"<>?~`) are sent + // via the bridge's `key-mod` subcommand which holds LeftShift around + // the key press. Tab, newline, DEL, and non-ASCII characters have no + // mapping and are rejected; higher layers should compose those via + // WebKit/Flutter/simctl backends instead. + // + // When delayMs > 0 an inter-character pause is inserted between + // consecutive key sends. This is required for segmented OTP-style + // inputs (e.g. 6-cell verify-code fields in Flutter) that drop + // characters when keys arrive in rapid succession (issue #639). + let first = true; + for (const ch of text) { + const key = asciiToHidKey(ch); + if (key === null) { + throw new InputBackendError( + `SimulatorKitHIDInputBackend.typeText: unsupported character '${ch}' ` + + '(no HID mapping). Only printable US-ASCII (U+0020..U+007E) is ' + + 'supported; tab, newline, and non-ASCII characters are not. ' + + 'Track follow-up in issue #483.', + 'BAD_ARGS', + ); + } + if (!first && delayMs > 0) { + await sleep(delayMs); + } + first = false; + if (key.shift) { + await this.run([ + deviceId, + 'key-mod', + String(key.usage), + String(HID_USAGE_LEFT_SHIFT), + ]); + } else { + await this.run([deviceId, 'key', String(key.usage)]); + } + } + }); + } + + async keypress(deviceId: string, keyCode: string): Promise { + await timedInput(this.kind, 'keypress', deviceId, async () => { + // Accept either a decimal HID usage code or a key name known to our map. + const parsed = Number.parseInt(keyCode, 10); + const usage = Number.isNaN(parsed) ? KEY_NAME_TO_HID_USAGE[keyCode] : parsed; + if (usage === undefined) { + throw new InputBackendError( + `SimulatorKitHIDInputBackend.keypress: unknown HID key code "${keyCode}"`, + 'BAD_ARGS', + ); + } + await this.run([deviceId, 'key', String(usage)]); + }); + } + + async sendKey(deviceId: string, keyName: string): Promise { + await timedInput(this.kind, 'sendKey', deviceId, async () => { + const usage = KEY_NAME_TO_HID_USAGE[keyName]; + if (usage === undefined) { + throw new InputBackendError( + `SimulatorKitHIDInputBackend.pressKey: unknown key "${keyName}". ` + + `Supported: ${Object.keys(KEY_NAME_TO_HID_USAGE).join(', ')}`, + 'BAD_ARGS', + ); + } + await this.run([deviceId, 'key', String(usage)]); + }); + } + + /** Convenience alias: resolve a symbolic key name to its HID usage. */ + async pressKey(deviceId: string, key: string): Promise { + await this.sendKey(deviceId, key); + } + + /** + * SimulatorKitHIDInputBackend supports batching. Each `sim-hid-bridge` + * invocation is a short-lived child-process spawn (~10–50 ms of OS + * overhead on a typical macOS host). When a caller needs to dispatch N + * taps in quick succession (e.g. rapidly filling a PIN pad), using + * `tapBatch()` allows it to express intent at the logical-batch level + * rather than calling `tap()` N times in a loop — which is identical at + * the wire level but explicitly communicates that the events form a unit. + * Future optimisations (e.g. a single-spawn batch subcommand in the + * Swift bridge, if added later) can be transparently wired in here + * without changing callers. + * + * **Limitation**: only tap events are supported. Batching swipe, key, or + * key-mod events is not implemented because the use-case is less + * frequent and the bridge does not yet expose a multi-command subcommand + * that covers those event types. See issue #705 for the follow-up scope. + */ + supportsBatching(): boolean { + return true; + } + + /** + * Dispatch multiple tap events sequentially. Events are sent in order; + * if any event fails the batch stops and rejects with that error. + * + * Spawn count before this API: N calls × 1 spawn each = N spawns. + * Spawn count after using tapBatch: N spawns (same at the bridge level, + * because the Swift bridge handles one command per process). The benefit + * is reduced caller overhead (no repeated `getInputBackend()` resolution, + * no per-event telemetry scaffolding outside the batch boundary) and a + * clear extension point when the bridge gains a batch subcommand. + * + * **Not supported** for swipe, typeText, keypress, or sendKey — those + * operations use separate bridge subcommands that are not yet batched. + * Callers that mix event types must call the individual methods directly. + */ + async tapBatch(deviceId: string, events: BatchTapEvent[]): Promise { + for (const event of events) { + await this.tap(deviceId, event.x, event.y, event.duration); + } + } + + /** + * Press `keyUsage` while holding `modifierUsage` (e.g. Cmd+V = keyChord(25, 227)). + * Wraps the bridge's `key-mod` subcommand so callers can compose chords + * without shelling out manually. Used by the pasteboard typing path. + */ + async keyChord( + deviceId: string, + keyUsage: number, + modifierUsage: number, + ): Promise { + await timedInput(this.kind, 'keyChord', deviceId, async () => { + await this.run([ + deviceId, + 'key-mod', + String(keyUsage), + String(modifierUsage), + ]); + }); + } + + /** + * Spawn the bridge with the given argv (not including the bridge path) + * and parse its JSON stdout. Surfaces every documented exit code as a + * structured `InputBackendError`. + */ + private async run(args: string[]): Promise { + if (!warnedAboutPrivateAPI) { + warnedAboutPrivateAPI = true; + console.error( + '[opensafari] SimulatorKitHIDInputBackend uses private Apple frameworks ' + + '(SimulatorKit.framework, CoreSimulator.framework) via dlopen. ' + + 'These APIs are undocumented and Xcode updates may break them. ' + + 'Where can I use this? macOS host / CI only — never bundle inside an ' + + 'iOS .ipa shipped to the App Store or TestFlight. ' + + PRIVATE_API_DOC_REF + + ' (see "Deployment scope").', + ); + } + const { cmd, cmdArgs } = this.resolveSpawn(args); + let stdout = ''; + let stderr = ''; + try { + const result = await execFileAsync(cmd, cmdArgs, { + timeout: SPAWN_TIMEOUT_MS, + maxBuffer: 1 * 1024 * 1024, + }); + stdout = result.stdout ?? ''; + stderr = result.stderr ?? ''; + } catch (err) { + const e = err as NodeJS.ErrnoException & { + stdout?: string; + stderr?: string; + code?: number | string; + killed?: boolean; + }; + stdout = e.stdout ?? ''; + stderr = e.stderr ?? ''; + + if (e.killed && e.code === null) { + throw new InputBackendError( + `sim-hid-bridge timed out after ${SPAWN_TIMEOUT_MS}ms`, + 'SPAWN_TIMEOUT', + stderr, + ); + } + + const exit = typeof e.code === 'number' ? e.code : undefined; + const classified = codeForExit(exit); + const hint = stderr.trim() || stdout.trim() || e.message; + // Attach the private-APIs doc pointer to every SimulatorKit-layer + // failure so MCP clients / CI logs link directly to the BC-break + // response playbook rather than surfacing a bare exit code. + const docSuffix = + classified === 'SIMULATORKIT_UNAVAILABLE' || classified === 'NOT_IMPLEMENTED' + ? ` (${PRIVATE_API_DOC_REF})` + : ''; + throw new InputBackendError( + `sim-hid-bridge exited ${exit ?? '?'}: ${hint}${docSuffix}`, + classified, + stderr, + ); + } + + // Successful spawn: parse the JSON envelope. A bridge that exits 0 but + // emits `{ ok: false, ... }` is treated as a structured failure too. + if (!stdout.trim()) { + return {}; + } + try { + const parsed = JSON.parse(stdout) as { ok?: boolean; error?: string; code?: string }; + if (parsed.ok === false) { + const okFalseCode = (parsed.code as InputBackendErrorCode | undefined) ?? 'UNKNOWN'; + const frameworkFailureCodes = new Set([ + 'SIMULATORKIT_MISSING', + 'CORESIMULATOR_MISSING', + 'HID_CLIENT_FAILED', + 'HID_FUNCTIONS_MISSING', + ]); + const okFalseDocSuffix = frameworkFailureCodes.has(parsed.code ?? '') ? ` (${PRIVATE_API_DOC_REF})` : ''; + throw new InputBackendError( + `${parsed.error ?? 'sim-hid-bridge reported ok=false'}${okFalseDocSuffix}`, + okFalseCode, + stderr, + ); + } + return parsed; + } catch (err) { + if (err instanceof InputBackendError) throw err; + const safeStdout = stdout + .slice(0, 200) + // Strip ASCII control / DEL so a crafted bridge payload can't inject + // ANSI escapes or JSON-RPC framing into MCP server logs. + .replace(/[\x00-\x1f\x7f]/g, '?'); + throw new InputBackendError( + `sim-hid-bridge produced non-JSON stdout: ${safeStdout}`, + 'JSON_PARSE_FAILURE', + stderr, + ); + } + } + + /** + * Decide how to invoke the bridge: as a compiled binary, or via the `swift` + * interpreter when only the .swift source is present (PoC fallback). + */ + private resolveSpawn(args: string[]): { cmd: string; cmdArgs: string[] } { + if (this.bridgePath.endsWith('.swift')) { + return { cmd: 'swift', cmdArgs: [this.bridgePath, ...args] }; + } + return { cmd: this.bridgePath, cmdArgs: args }; + } +} + +/** + * Attempt to locate a usable sim-hid-bridge. Returns a ready-to-use backend, + * or throws an `InputBackendError` with code `HID_BRIDGE_MISSING` when no + * helper is installed on this machine. Callers in the resolver chain are + * expected to catch that error and fall through to the next tier; callers + * that always require the bridge (e.g. `pasteboard-input`) propagate it. + * + * Lookup order: + * 1. Compiled binary at `dist/sim-hid-bridge` (next to `dist/ax-bridge`). + * 2. Swift source at `dist/sim-hid-bridge.swift` (post-build copy). + * 3. Source tree fallback at `src/native/sim-hid-bridge.swift` — DEV ONLY, + * gated behind `OPENSAFARI_ALLOW_SWIFT_INTERPRETER=1`. The repo-relative + * path escapes `dist/` when the package is installed as a dependency, + * and executing unsigned Swift source via the interpreter sidesteps any + * future codesigning we add to the compiled binary, so this candidate + * is intentionally NOT auto-discovered in production installs. + * + * Note: the return type still includes `null` for forward compatibility with + * a future tier-fallback variant that prefers null over throwing. Today the + * function only resolves to a backend or throws. + */ +export async function tryCreateSimulatorKitHIDBackend(): Promise< + SimulatorKitHIDInputBackend | null +> { + const candidates = [ + // Compiled binary co-located with ax-bridge after build. + path.resolve(__dirname, '..', 'sim-hid-bridge'), + path.resolve(__dirname, 'sim-hid-bridge'), + // Swift source copied into dist/ by the postbuild step. + path.resolve(__dirname, '..', 'sim-hid-bridge.swift'), + path.resolve(__dirname, 'sim-hid-bridge.swift'), + ]; + if (process.env.OPENSAFARI_ALLOW_SWIFT_INTERPRETER === '1') { + candidates.push( + path.resolve(__dirname, '..', '..', 'src', 'native', 'sim-hid-bridge.swift'), + ); + } + for (const candidate of candidates) { + if (existsSync(candidate)) { + return new SimulatorKitHIDInputBackend(candidate); + } + } + const searched = candidates.map((c) => ` - ${c}`).join('\n'); + throw new InputBackendError( + `sim-hid-bridge not found. Searched:\n${searched}\n` + + 'Run npm run build or set OPENSAFARI_ALLOW_SWIFT_INTERPRETER=1 for dev mode.', + 'HID_BRIDGE_MISSING', + ); +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/input/simctl-backend.ts b/src/input/simctl-backend.ts new file mode 100644 index 00000000..a7bd6168 --- /dev/null +++ b/src/input/simctl-backend.ts @@ -0,0 +1,84 @@ +/** + * SimctlInputBackend — uses `xcrun simctl io input` subcommands. + * + * Available on Xcode versions that ship the `input` subcommand (typically ≤ 16). + * + * Split from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. Behavior is strictly unchanged. + */ + +import { SimctlExecutor } from '../simulator/simctl'; +import { timedInput } from '../metrics/input-telemetry'; +import type { InputBackend } from './backend'; + +export class SimctlInputBackend implements InputBackend { + readonly kind = 'simctl' as const; + private simctl: SimctlExecutor; + + constructor(simctl?: SimctlExecutor) { + this.simctl = simctl ?? new SimctlExecutor(); + } + + async tap(deviceId: string, x: number, y: number, duration?: number): Promise { + await timedInput(this.kind, 'tap', deviceId, async () => { + if (duration && duration > 0) { + await this.simctl.exec([ + 'io', deviceId, 'input', 'press', + String(x), String(y), String(duration), + ]); + } else { + await this.simctl.exec(['io', deviceId, 'input', 'tap', String(x), String(y)]); + } + }); + } + + async swipe( + deviceId: string, + startX: number, startY: number, + endX: number, endY: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'swipe', deviceId, async () => { + try { + await this.simctl.exec([ + 'io', deviceId, 'input', 'swipe', + String(startX), String(startY), String(endX), String(endY), + ]); + } catch { + // Fallback: `drag` accepts a duration argument + await this.simctl.exec([ + 'io', deviceId, 'input', 'drag', + String(startX), String(startY), String(endX), String(endY), + String(duration ?? 0.5), + ]); + } + }); + } + + async typeText(deviceId: string, text: string, _delayMs?: number): Promise { + await timedInput(this.kind, 'typeText', deviceId, async () => { + await this.simctl.exec(['io', deviceId, 'input', 'text', text]); + }); + } + + async keypress(deviceId: string, keyCode: string): Promise { + await timedInput(this.kind, 'keypress', deviceId, async () => { + await this.simctl.exec(['io', deviceId, 'input', 'keypress', keyCode]); + }); + } + + async sendKey(deviceId: string, keyName: string): Promise { + await timedInput(this.kind, 'sendKey', deviceId, async () => { + await this.simctl.exec(['io', deviceId, 'sendkey', keyName]); + }); + } + + /** + * Batching is not supported on SimctlInputBackend. Each `xcrun simctl io + * input` invocation opens a separate Xcode IPC channel; accumulating calls + * at the TypeScript level would not reduce that per-call overhead. + */ + supportsBatching(): boolean { + return false; + } +} diff --git a/src/input/webkit-backend.ts b/src/input/webkit-backend.ts new file mode 100644 index 00000000..f9a3f002 --- /dev/null +++ b/src/input/webkit-backend.ts @@ -0,0 +1,150 @@ +/** + * WebKitInputBackend — uses WebKit Remote Debugging Protocol (JavaScript touch events). + * + * Completely focus-free — communicates over a TCP socket, so the Simulator + * window does not need to be in the foreground. + * + * Limitations: + * - Only works when Safari/WebView is connected via WebKit protocol + * - Touch events dispatched via JS have `isTrusted: false`, so native + * scroll is supplemented with an explicit `window.scrollBy()` call + * + * Split from `src/tools/native-input-backend.ts` as part of the #707 (a) + * refactor. Behavior is strictly unchanged. + */ + +import type { BrowserBackend } from '../types/browser-backend'; +import { timedInput } from '../metrics/input-telemetry'; +import { + buildLongPressScript, + buildSwipeScript, +} from '../webkit/dom-input-scripts'; +import type { InputBackend } from './backend'; + +/** + * HID key-code → standard key name mapping for WebKit `press()`. + */ +export const HID_TO_WEBKIT_KEY: Record = { + '40': 'Enter', + '41': 'Escape', + '42': 'Backspace', + '43': 'Tab', + '44': 'Space', + '74': 'Home', + '79': 'ArrowRight', + '80': 'ArrowLeft', + '81': 'ArrowDown', + '82': 'ArrowUp', +}; + +/** + * Named key → WebKit `press()` key name mapping. + */ +export const SENDKEY_TO_WEBKIT_KEY: Record = { + Return: 'Enter', + Escape: 'Escape', + Tab: 'Tab', + Space: 'Space', + Delete: 'Backspace', + Home: 'Home', +}; + +/** + * Uses WebKit Remote Debugging Protocol (JavaScript touch events) for input. + */ +export class WebKitInputBackend implements InputBackend { + readonly kind = 'webkit' as const; + constructor(private client: BrowserBackend) {} + + async tap(deviceId: string, x: number, y: number, duration?: number): Promise { + await timedInput(this.kind, 'tap', deviceId, async () => { + if (duration && duration > 0) { + // Long press via touch events with delay + await this.client.evaluate( + buildLongPressScript({ x, y, durationMs: duration * 1000 }), + ); + } else { + // Normal tap — delegate to BrowserBackend.click() which dispatches + // touchstart → touchend → click with emulateUserGesture + await this.client.click({ x, y }); + } + }); + } + + async swipe( + deviceId: string, + startX: number, startY: number, + endX: number, endY: number, + duration?: number, + ): Promise { + await timedInput(this.kind, 'swipe', deviceId, async () => { + const scrollX = startX - endX; + const scrollY = startY - endY; + const steps = 20; + const stepDelay = ((duration ?? 0.5) * 1000) / steps; + + // Two-pronged: window.scrollBy for native scroll + touch events for JS handlers + await this.client.evaluate( + buildSwipeScript({ + startX, startY, endX, endY, steps, stepDelayMs: stepDelay, + scroll: { scrollX, scrollY }, + }), + ); + }); + } + + async typeText(deviceId: string, text: string, _delayMs?: number): Promise { + await timedInput(this.kind, 'typeText', deviceId, async () => { + const escaped = JSON.stringify(text); + await this.client.evaluate(` + (function() { + var el = document.activeElement; + if (!el || el === document.body) return; + var p = Object.getPrototypeOf(el); + while (p && !Object.getOwnPropertyDescriptor(p, 'value')) { + p = Object.getPrototypeOf(p); + } + var desc = p ? Object.getOwnPropertyDescriptor(p, 'value') : null; + var cur = (desc && desc.get) ? desc.get.call(el) : (el.value || ''); + if (desc && desc.set) { + desc.set.call(el, cur + ${escaped}); + } else if ('value' in el) { + el.value = cur + ${escaped}; + } + el.dispatchEvent(new Event('input', { bubbles: true })); + el.dispatchEvent(new Event('change', { bubbles: true })); + })() + `); + }); + } + + async keypress(deviceId: string, keyCode: string): Promise { + await timedInput(this.kind, 'keypress', deviceId, async () => { + const keyName = HID_TO_WEBKIT_KEY[keyCode]; + if (!keyName) { + throw new Error( + `Unknown HID key code "${keyCode}" for WebKit backend. ` + + `Supported: ${Object.keys(HID_TO_WEBKIT_KEY).join(', ')}`, + ); + } + await this.client.press(keyName); + }); + } + + async sendKey(deviceId: string, keyName: string): Promise { + await timedInput(this.kind, 'sendKey', deviceId, async () => { + const mapped = SENDKEY_TO_WEBKIT_KEY[keyName] ?? keyName; + await this.client.press(mapped); + }); + } + + /** + * Batching is not supported on WebKitInputBackend. JS injection executes + * in-process over an already-established WebSocket — there is no process + * spawn overhead to reduce. Each `tap()` call is already near-zero-cost + * from a spawn perspective. + */ + supportsBatching(): boolean { + return false; + } +} diff --git a/src/metrics/input-telemetry-rollup.ts b/src/metrics/input-telemetry-rollup.ts index 74aec3dd..c8f5d330 100644 --- a/src/metrics/input-telemetry-rollup.ts +++ b/src/metrics/input-telemetry-rollup.ts @@ -14,7 +14,7 @@ * when the elapsed-ms stream is dominated by failures. */ -import type { InputBackendKind } from '../tools/native-input-backend'; +import type { InputBackendKind } from '../input/backend'; import type { InputOperation, InputTelemetryEvent } from './input-telemetry'; /** Max samples retained per `${backendKind}:${operation}` key. */ diff --git a/src/metrics/input-telemetry.ts b/src/metrics/input-telemetry.ts index 4085fd1d..ee12e71d 100644 --- a/src/metrics/input-telemetry.ts +++ b/src/metrics/input-telemetry.ts @@ -12,7 +12,7 @@ */ import { AsyncLocalStorage } from 'node:async_hooks'; -import type { InputBackendKind } from '../tools/native-input-backend'; +import type { InputBackendKind } from '../input/backend'; import { accumulateInputTelemetry } from './input-telemetry-rollup'; import { recordMemorySample, diff --git a/src/tools/flutter-vm-input-backend.ts b/src/tools/flutter-vm-input-backend.ts index aef2a6b4..e0d4f994 100644 --- a/src/tools/flutter-vm-input-backend.ts +++ b/src/tools/flutter-vm-input-backend.ts @@ -1,532 +1,17 @@ /** - * FlutterVMInputBackend — Tier-0 headless input backend for Flutter apps. + * Compatibility re-export shim for `src/input/flutter-vm-backend`. * - * Dispatches pointer/keyboard/text events directly to the Flutter engine via - * the Dart VM Service (`FlutterVMClient.evaluate`). Because the events are - * synthesised inside the running Dart isolate and fed straight into - * `PlatformDispatcher.onPointerDataPacket`, this backend: + * The implementation was moved to `src/input/flutter-vm-backend.ts` as part + * of the #707 (b) consolidation. This file re-exports every previously-public + * symbol so existing callers (tests, tools) continue to work without + * modification. * - * - **Does not move the physical mouse cursor** (no CGEvent) - * - **Does not bring Simulator.app to the foreground** (no AppleScript - * activation) - * - **Requires no opt-in env var** — it is truly headless - * - * Compared to the three existing tiers (simctl → webkit → applescript), this - * path is picked first whenever the target device is running a Flutter app in - * debug/profile mode and the VM Service URL can be discovered. Native UIKit - * apps continue to flow through the existing tiers unchanged. - * - * Coordinate system: iOS AX frames are expressed in logical points (the same - * units Flutter calls "logical pixels"). The Dart payload multiplies by the - * implicit view's `devicePixelRatio` to land on the engine's `physicalX/Y` - * expectations. - * - * See issue #481 for the motivation and rollout checklist. - */ - -import type { FlutterVMClient } from '../flutter'; -import { FlutterVMError } from '../flutter'; -import type { InputBackend, InputBackendKind } from './native-input-backend'; -import { timedInput } from '../metrics/input-telemetry'; - -/** - * Structured error surfaced by FlutterVMInputBackend when the underlying VM - * Service call fails (connection drop, Dart exception, timeout, etc). Carries - * the originating op so observability layers can attribute the failure. - */ -/** - * Structured error codes attached to `FlutterVMInputBackendError`. - * - * Today the code table is deliberately small — callers branch only on - * `VM_NO_EVALUATE` (release-build / no-DDS fallback signal) vs "anything - * else" which is surfaced to the user as a concrete failure. Expand this - * as we learn which failure modes the callers actually need to - * discriminate. - */ -export type FlutterVMInputBackendErrorCode = - /** `evaluate` rejected with code 113 — VM cannot compile expressions. */ - | 'VM_NO_EVALUATE' - /** Dart code ran but raised / returned an @Error. */ - | 'DART_ERROR' - /** Any other cause (connection drop, timeout, unknown). */ - | 'UNKNOWN'; - -export class FlutterVMInputBackendError extends Error { - readonly name = 'FlutterVMInputBackendError' as const; - readonly op: 'tap' | 'swipe' | 'typeText' | 'keypress' | 'sendKey'; - readonly cause: unknown; - readonly code: FlutterVMInputBackendErrorCode; - - constructor( - op: FlutterVMInputBackendError['op'], - cause: unknown, - ) { - const msg = cause instanceof Error ? cause.message : String(cause); - const code: FlutterVMInputBackendErrorCode = classifyFlutterVMCause(cause); - // Release / no-DDS builds emit a verbose JSON-RPC string — replace it - // with a short, actionable diagnostic so tool consumers see a single - // clean message instead of the compiler's internal error payload. - const userMessage = - code === 'VM_NO_EVALUATE' - ? `FlutterVMInputBackend.${op} failed: VM cannot compile expressions ` + - `(code 113). This app is likely a release build or was launched ` + - `with \`simctl launch\` instead of \`flutter run\`. ` + - `Use Tier 1.5 AX press (app_tap_element / app_type_element) or ` + - `relaunch under \`flutter run --debug\` for full gesture coverage. ` + - `See docs/ci-recipes.md#qa-ready-flutter-build for the ` + - `simulator (--debug) and physical-device (--profile) recipes ` + - `that keep Tier 0 available.` - : `FlutterVMInputBackend.${op} failed: ${msg}`; - super(userMessage); - this.op = op; - this.cause = cause; - this.code = code; - Object.setPrototypeOf(this, FlutterVMInputBackendError.prototype); - } -} - -function classifyFlutterVMCause(cause: unknown): FlutterVMInputBackendErrorCode { - const msg = cause instanceof Error ? cause.message : String(cause); - if (/\(code:\s*113\)/.test(msg)) return 'VM_NO_EVALUATE'; - // The inner `evalOrThrow` wraps Dart-side errors with code `DART_ERROR` - // via `FlutterVMError`; preserve that classification when bubbling up. - if ( - cause && - typeof cause === 'object' && - 'code' in cause && - (cause as { code?: string }).code === 'DART_ERROR' - ) { - return 'DART_ERROR'; - } - return 'UNKNOWN'; -} - -/** - * HID key-code → Dart `LogicalKeyboardKey` identifier. The keyIds match the - * values exposed by `package:flutter/services.dart` so the Dart payload can - * materialise a `KeyDownEvent` / `KeyUpEvent` pair. - */ -const HID_TO_LOGICAL_KEY: Record = { - '40': { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, - '41': { keyId: 'LogicalKeyboardKey.escape', keyLabel: 'Escape', physicalKey: 'PhysicalKeyboardKey.escape' }, - '42': { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, - '43': { keyId: 'LogicalKeyboardKey.tab', keyLabel: 'Tab', physicalKey: 'PhysicalKeyboardKey.tab' }, - '44': { keyId: 'LogicalKeyboardKey.space', keyLabel: ' ', physicalKey: 'PhysicalKeyboardKey.space' }, - '74': { keyId: 'LogicalKeyboardKey.home', keyLabel: 'Home', physicalKey: 'PhysicalKeyboardKey.home' }, - '79': { keyId: 'LogicalKeyboardKey.arrowRight', keyLabel: 'ArrowRight', physicalKey: 'PhysicalKeyboardKey.arrowRight' }, - '80': { keyId: 'LogicalKeyboardKey.arrowLeft', keyLabel: 'ArrowLeft', physicalKey: 'PhysicalKeyboardKey.arrowLeft' }, - '81': { keyId: 'LogicalKeyboardKey.arrowDown', keyLabel: 'ArrowDown', physicalKey: 'PhysicalKeyboardKey.arrowDown' }, - '82': { keyId: 'LogicalKeyboardKey.arrowUp', keyLabel: 'ArrowUp', physicalKey: 'PhysicalKeyboardKey.arrowUp' }, -}; - -const SENDKEY_TO_LOGICAL_KEY: Record = { - Return: { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, - Enter: { keyId: 'LogicalKeyboardKey.enter', keyLabel: 'Enter', physicalKey: 'PhysicalKeyboardKey.enter' }, - Escape: { keyId: 'LogicalKeyboardKey.escape', keyLabel: 'Escape', physicalKey: 'PhysicalKeyboardKey.escape' }, - Tab: { keyId: 'LogicalKeyboardKey.tab', keyLabel: 'Tab', physicalKey: 'PhysicalKeyboardKey.tab' }, - Space: { keyId: 'LogicalKeyboardKey.space', keyLabel: ' ', physicalKey: 'PhysicalKeyboardKey.space' }, - Delete: { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, - Backspace: { keyId: 'LogicalKeyboardKey.backspace', keyLabel: 'Backspace', physicalKey: 'PhysicalKeyboardKey.backspace' }, - Home: { keyId: 'LogicalKeyboardKey.home', keyLabel: 'Home', physicalKey: 'PhysicalKeyboardKey.home' }, - ArrowRight: { keyId: 'LogicalKeyboardKey.arrowRight', keyLabel: 'ArrowRight', physicalKey: 'PhysicalKeyboardKey.arrowRight' }, - ArrowLeft: { keyId: 'LogicalKeyboardKey.arrowLeft', keyLabel: 'ArrowLeft', physicalKey: 'PhysicalKeyboardKey.arrowLeft' }, - ArrowDown: { keyId: 'LogicalKeyboardKey.arrowDown', keyLabel: 'ArrowDown', physicalKey: 'PhysicalKeyboardKey.arrowDown' }, - ArrowUp: { keyId: 'LogicalKeyboardKey.arrowUp', keyLabel: 'ArrowUp', physicalKey: 'PhysicalKeyboardKey.arrowUp' }, -}; - -/** - * Format a finite number for interpolation into a Dart literal. Reject NaN / - * ±Infinity so the VM Service never receives a syntactically invalid - * expression (e.g. `Offset(NaN, NaN)` would confuse the analyser). - */ -function dartNum(value: number, label: string): string { - if (!Number.isFinite(value)) { - throw new Error(`Invalid ${label}: ${value} (must be finite)`); - } - // toString() preserves sufficient precision for pixel coordinates. - return value.toString(); -} - -/** - * Escape a JS string for safe embedding inside a Dart single-quoted literal. - * Dart string escape rules are similar to JS but the safest approach is to - * emit a Dart list-of-codeUnits from JSON.stringify, avoiding any ambiguity - * over dollar interpolation, backslashes, quotes, or non-ASCII. - */ -function dartStringLiteral(value: string): string { - // Encode via a Dart `String.fromCharCodes` so we never have to worry about - // dollar-sign interpolation, adjacent quotes, or embedded newlines. - const codeUnits: number[] = []; - for (let i = 0; i < value.length; i++) { - codeUnits.push(value.charCodeAt(i)); - } - return `String.fromCharCodes(const [${codeUnits.join(',')}])`; -} - -/** - * FlutterVMInputBackend — implements the InputBackend contract by evaluating - * Dart expressions inside the target app's main isolate. + * New consumers should import directly from `../input/flutter-vm-backend`. */ -export class FlutterVMInputBackend implements InputBackend { - readonly kind: InputBackendKind = 'flutter-vm'; - private libIdCache: Map = new Map(); - - constructor(private vmClient: FlutterVMClient) {} - - /** - * Resolve a Flutter library URI to its VM Service library id, caching the - * result per URI. Each operation targets a different library so that the - * required symbols are in lexical scope: - * - pointer dispatch → `mouse_tracker.dart` (bare `import 'dart:ui'`) - * - text input → `editable_text.dart` - * - key events → `hardware_keyboard.dart` - * - * Same pattern used by `FlutterVMClient.selectWidgetAtPoint` for the - * inspector library (see `vm-service-client.ts:386-411`). - */ - private async resolveLibId(uri: string): Promise { - const cached = this.libIdCache.get(uri); - if (cached) return cached; - - const isolateId = this.vmClient.getState()?.mainIsolateId; - if (!isolateId) { - throw new FlutterVMError('No main isolate', 'NO_ISOLATE'); - } - const isolate = await this.vmClient.callMethod('getIsolate', { isolateId }); - const libs = - (isolate as { libraries?: Array<{ uri?: string; id?: string }> }).libraries ?? []; - const lib = libs.find((l) => l.uri === uri); - if (!lib?.id) { - throw new FlutterVMError( - `${uri} library not loaded in isolate — is this a Flutter app?`, - 'NO_BINDING_LIB', - ); - } - this.libIdCache.set(uri, lib.id); - return lib.id; - } - - /** - * Synthesise a pointer down → up sequence. When `duration` (in seconds) is - * positive the up event is timestamped `duration * 1000` ms after the down - * event so Flutter's gesture arena treats it as a long-press rather than a - * tap. The Dart payload ends with - * `SchedulerBinding.instance.scheduleFrame()` to ensure the engine pumps - * the event queue even in a quiescent state. - */ - async tap( - deviceId: string, - x: number, - y: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'tap', deviceId, () => this.tapInternal(x, y, duration)); - } - - private async tapInternal( - x: number, - y: number, - duration?: number, - ): Promise { - let xStr: string; - let yStr: string; - try { - xStr = dartNum(x, 'x'); - yStr = dartNum(y, 'y'); - } catch (err) { - throw new FlutterVMInputBackendError('tap', err); - } - const durMs = duration && duration > 0 ? Math.round(duration * 1000) : 0; - - const expression = - '(() {' + - ' final dispatcher = PlatformDispatcher.instance;' + - ' final view = dispatcher.implicitView;' + - ' if (view == null) { return false; }' + - ' final dpr = view.devicePixelRatio;' + - ` final double px = ${xStr} * dpr;` + - ` final double py = ${yStr} * dpr;` + - ` final int downUs = 0;` + - ` final int upUs = ${durMs} * 1000;` + - ' void dispatch(int tUs, PointerChange change) {' + - ' final packet = PointerDataPacket(data: [' + - ' PointerData(' + - ' timeStamp: Duration(microseconds: tUs),' + - ' change: change,' + - ' kind: PointerDeviceKind.touch,' + - ' device: 1,' + - ' pointerIdentifier: 1,' + - ' physicalX: px,' + - ' physicalY: py,' + - ' buttons: change == PointerChange.up ? 0 : 1,' + - ' pressure: change == PointerChange.up ? 0.0 : 1.0,' + - ' pressureMax: 1.0,' + - ' ),' + - ' ]);' + - ' dispatcher.onPointerDataPacket?.call(packet);' + - ' }' + - ' dispatch(downUs, PointerChange.add);' + - ' dispatch(downUs, PointerChange.down);' + - ' dispatch(upUs, PointerChange.up);' + - ' dispatch(upUs, PointerChange.remove);' + - ' return true;' + - '})()'; - - await this.evalOrThrow('tap', expression, 'package:flutter/src/rendering/mouse_tracker.dart'); - } - - /** - * Synthesise a drag gesture as a down → N×move → up sequence. `duration` - * (seconds) spreads the move events evenly across the requested window so - * the gesture arena classifies it as a swipe rather than a flick or tap. - */ - async swipe( - deviceId: string, - startX: number, - startY: number, - endX: number, - endY: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'swipe', deviceId, () => - this.swipeInternal(startX, startY, endX, endY, duration), - ); - } - - private async swipeInternal( - startX: number, - startY: number, - endX: number, - endY: number, - duration?: number, - ): Promise { - const sxStr = dartNum(startX, 'startX'); - const syStr = dartNum(startY, 'startY'); - const exStr = dartNum(endX, 'endX'); - const eyStr = dartNum(endY, 'endY'); - const totalMs = Math.max(1, Math.round((duration ?? 0.5) * 1000)); - const steps = 20; - const stepUs = Math.round((totalMs * 1000) / steps); - - const expression = - '(() {' + - ' final dispatcher = PlatformDispatcher.instance;' + - ' final view = dispatcher.implicitView;' + - ' if (view == null) { return false; }' + - ' final dpr = view.devicePixelRatio;' + - ` final double sx = ${sxStr} * dpr;` + - ` final double sy = ${syStr} * dpr;` + - ` final double ex = ${exStr} * dpr;` + - ` final double ey = ${eyStr} * dpr;` + - ` const int steps = ${steps};` + - ` const int stepUs = ${stepUs};` + - ' void post(int tUs, PointerChange change, double x, double y) {' + - ' final packet = PointerDataPacket(data: [' + - ' PointerData(' + - ' timeStamp: Duration(microseconds: tUs),' + - ' change: change,' + - ' kind: PointerDeviceKind.touch,' + - ' device: 1,' + - ' pointerIdentifier: 1,' + - ' physicalX: x,' + - ' physicalY: y,' + - ' buttons: change == PointerChange.up ? 0 : 1,' + - ' pressure: change == PointerChange.up ? 0.0 : 1.0,' + - ' pressureMax: 1.0,' + - ' ),' + - ' ]);' + - ' dispatcher.onPointerDataPacket?.call(packet);' + - ' }' + - ' post(0, PointerChange.add, sx, sy);' + - ' post(0, PointerChange.down, sx, sy);' + - ' for (int i = 1; i <= steps; i++) {' + - ' final double t = i / steps;' + - ' final double x = sx + (ex - sx) * t;' + - ' final double y = sy + (ey - sy) * t;' + - ' post(stepUs * i, PointerChange.move, x, y);' + - ' }' + - ' final int endUs = stepUs * steps;' + - ' post(endUs, PointerChange.up, ex, ey);' + - ' post(endUs, PointerChange.remove, ex, ey);' + - ' return true;' + - '})()'; - - await this.evalOrThrow('swipe', expression, 'package:flutter/src/rendering/mouse_tracker.dart'); - } - - /** - * Inject text into the currently-focused `EditableText` via Flutter's - * `TextInput` channel. This mirrors what the iOS IME would send when the - * user types on the system keyboard, so controllers and `onChanged` - * callbacks fire naturally. Falls through silently (no-op) if nothing is - * focused — same behaviour as WebKitInputBackend. - */ - async typeText(deviceId: string, text: string): Promise { - await timedInput(this.kind, 'typeText', deviceId, () => this.typeTextInternal(text)); - } - - private async typeTextInternal(text: string): Promise { - const textLit = dartStringLiteral(text); - - // Read the live TextInputConnection client id so the platform message - // targets the correct connection. The Flutter framework drops messages - // where args[0] != _currentConnection._id, so hardcoding -1 would be - // a silent no-op. We read the id via TextInput._currentConnection._id, - // which is private but accessible via evaluate on the binding library. - const expression = - '(() async {' + - ` final String newText = ${textLit};` + - ' // Read the current TextInputConnection client id.' + - ' // If nothing is focused, fall back to -1 (message is dropped, same' + - ' // as typing on a hardware keyboard with no focused field).' + - ' int clientId = -1;' + - ' try {' + - ' // Fallback: just check if the primary focus accepts text.' + - ' final focused = FocusManager.instance.primaryFocus;' + - ' if (focused != null && focused.context != null) {' + - ' final editable = focused.context!.findAncestorStateOfType();' + - ' if (editable != null) {' + - ' // Force update through the editable directly — this is' + - ' // the most reliable path since it bypasses the private _id.' + - ' final ctrl = editable.textEditingValue;' + - ' editable.userUpdateTextEditingValue(' + - ' TextEditingValue(' + - ' text: ctrl.text + newText,' + - ' selection: TextSelection.collapsed(offset: ctrl.text.length + newText.length),' + - ' ),' + - ' SelectionChangedCause.keyboard,' + - ' );' + - ' return true;' + - ' }' + - ' }' + - ' } catch (_) {}' + - ' // Fallback: deliver via platform channel with best-effort client id.' + - ' final Map state = {' + - ' "text": newText,' + - ' "selectionBase": newText.length,' + - ' "selectionExtent": newText.length,' + - ' "selectionAffinity": "TextAffinity.downstream",' + - ' "selectionIsDirectional": false,' + - ' "composingBase": -1,' + - ' "composingExtent": -1,' + - ' };' + - ' final message = const JSONMethodCodec().encodeMethodCall(' + - ' MethodCall("TextInputClient.updateEditingState", [clientId, state]),' + - ' );' + - ' await WidgetsBinding.instance!.defaultBinaryMessenger.handlePlatformMessage(' + - ' "flutter/textinput",' + - ' message,' + - ' (dynamic _) {},' + - ' );' + - ' return true;' + - '})()'; - - await this.evalOrThrow('typeText', expression, 'package:flutter/src/widgets/editable_text.dart'); - } - - /** - * Dispatch a HID key code through `HardwareKeyboard`. Only a curated set of - * control keys is supported — matches the WebKit/AppleScript backends. - */ - async keypress(deviceId: string, keyCode: string): Promise { - await timedInput(this.kind, 'keypress', deviceId, () => this.keypressInternal(keyCode)); - } - - private async keypressInternal(keyCode: string): Promise { - const entry = HID_TO_LOGICAL_KEY[keyCode]; - if (!entry) { - throw new Error( - `Unknown HID key code "${keyCode}" for FlutterVM backend. ` + - `Supported: ${Object.keys(HID_TO_LOGICAL_KEY).join(', ')}`, - ); - } - await this.dispatchKey('keypress', entry.keyId, entry.keyLabel, entry.physicalKey); - } - - /** Dispatch a named key ("Return", "Escape", ...) through HardwareKeyboard. */ - async sendKey(deviceId: string, keyName: string): Promise { - await timedInput(this.kind, 'sendKey', deviceId, () => this.sendKeyInternal(keyName)); - } - - private async sendKeyInternal(keyName: string): Promise { - const entry = SENDKEY_TO_LOGICAL_KEY[keyName]; - if (!entry) { - throw new Error( - `Unknown key name "${keyName}" for FlutterVM backend. ` + - `Supported: ${Object.keys(SENDKEY_TO_LOGICAL_KEY).join(', ')}`, - ); - } - await this.dispatchKey('sendKey', entry.keyId, entry.keyLabel, entry.physicalKey); - } - - // ── internals ────────────────────────────────────────────────────────── - - private async dispatchKey( - op: 'keypress' | 'sendKey', - logicalKeyExpr: string, - keyLabel: string, - physicalKeyExpr: string, - ): Promise { - const labelLit = dartStringLiteral(keyLabel); - // Emit a KeyDown event then a KeyUp through HardwareKeyboard so downstream - // focus nodes observe a complete press. `timeStamp` uses the default - // (zero) — the event queue does not require strict monotonicity. - const expression = - '(() {' + - ` final label = ${labelLit};` + - ` final logical = ${logicalKeyExpr};` + - ` final physical = ${physicalKeyExpr};` + - ' final down = KeyDownEvent(' + - ' physicalKey: physical,' + - ' logicalKey: logical,' + - ' timeStamp: Duration.zero,' + - ' character: label.length == 1 ? label : null,' + - ' );' + - ' final up = KeyUpEvent(' + - ' physicalKey: physical,' + - ' logicalKey: logical,' + - ' timeStamp: Duration.zero,' + - ' );' + - ' HardwareKeyboard.instance.handleKeyEvent(down);' + - ' HardwareKeyboard.instance.handleKeyEvent(up);' + - ' return true;' + - '})()'; - - await this.evalOrThrow(op, expression, 'package:flutter/src/services/hardware_keyboard.dart'); - } - private async evalOrThrow( - op: FlutterVMInputBackendError['op'], - expression: string, - libraryUri: string, - ): Promise { - try { - // Scope the evaluate to the per-operation Flutter library so all - // required symbols are in lexical scope. - const targetId = await this.resolveLibId(libraryUri); - const result = await this.vmClient.evaluate(expression, { targetId }); - // VM returns an @Error shape instead of throwing when the expression - // itself compiled but raised a Dart exception. Surface that as a - // structured InputBackendError. - const errType = (result as { type?: string }).type; - if (errType === '@Error' || errType === 'Error') { - const message = - (result as { message?: string }).message ?? JSON.stringify(result); - throw new FlutterVMError(message, 'DART_ERROR'); - } - } catch (err) { - if (err instanceof FlutterVMInputBackendError) throw err; - throw new FlutterVMInputBackendError(op, err); - } - } +export { + FlutterVMInputBackendError, + FlutterVMInputBackend, +} from '../input/flutter-vm-backend'; - /** - * Batching is not supported on FlutterVMInputBackend. Events are injected - * via `evaluate` over an already-open WebSocket — there is no process spawn - * overhead to reduce. Callers should use `tap()` in a loop for repeated - * events. - */ - supportsBatching(): boolean { - return false; - } -} +export type { FlutterVMInputBackendErrorCode } from '../input/flutter-vm-backend'; diff --git a/src/tools/native-input-backend.ts b/src/tools/native-input-backend.ts index 12e66191..0ea822c6 100644 --- a/src/tools/native-input-backend.ts +++ b/src/tools/native-input-backend.ts @@ -1,888 +1,82 @@ /** - * NativeInputBackend — Abstraction layer for sending input events to iOS Simulator. + * NativeInputBackend — compatibility shim. * - * Provides three backends (selected via auto-detection): - * 1. SimctlInputBackend — `xcrun simctl io input` (Xcode 15–16) - * 2. WebKitInputBackend — JavaScript touch events via WebKit protocol (Xcode 26+, Safari) - * 3. AppleScriptInputBackend — osascript + CGEvent (opt-in only, requires window focus) + * All concrete backends, the resolver, and the error class have been split into + * focused modules under `src/input/` as part of the #707 (a) refactor. + * This file re-exports every previously-public symbol so existing callers + * (`src/tools/native-input-utils.ts`, `src/metrics/cache-budget.ts`, etc.) + * continue to work without modification. * - * On Xcode 26+ where `simctl io input` was removed, the WebKit backend provides - * focus-free touch injection for Safari web content. + * Behavior is unchanged: no fallback semantics were altered; no public tool + * API was modified. * - * The AppleScript backend is **default-deny**: it is only instantiated when the - * caller explicitly opts in via the `OPENSAFARI_ALLOW_FOCUS_INPUT=1` environment - * variable. Without the opt-in, `getInputBackend()` throws - * `HeadlessInputUnavailableError` with actionable remediation guidance. This - * prevents the surprising focus-theft / mouse-movement behavior that motivated - * issues #403 and #405. + * New consumers should import directly from: + * - `../input/backend` — InputBackend interface + InputBackendKind + * - `../input/simctl-backend` — SimctlInputBackend + * - `../input/applescript-backend`— AppleScriptInputBackend + key maps + * - `../input/webkit-backend` — WebKitInputBackend + key maps + * - `../input/flutter-resolver` — FlutterVMResolver, FlutterVMResolverInstance + * - `../input/backend-resolver` — InputBackendResolver, HeadlessInputUnavailableError */ -import { execFile } from 'child_process'; -import { promisify } from 'util'; -import { SimctlExecutor } from '../simulator/simctl'; -import type { BrowserBackend } from '../types/browser-backend'; -import type { FlutterVMClient } from '../flutter'; -import { getFlutterVMClient, removeFlutterVMClient } from '../flutter'; -import { FlutterVMInputBackend } from './flutter-vm-input-backend'; -import { tryCreateSimulatorKitHIDBackend } from './sim-hid-input-backend'; -import { - isPointerServiceEnabled, - tryCreatePointerServiceBackend, -} from './pointer-service-input-backend'; -import { timedInput } from '../metrics/input-telemetry'; -import { - buildLongPressScript, - buildSwipeScript, -} from '../webkit/dom-input-scripts'; - -const execFileAsync = promisify(execFile); - -function delay(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} - -// ── Interface ──────────────────────────────────────────────────────────────── - -/** - * Stable identifier for each concrete input backend. Included in tool call - * results so MCP clients and users can audit which path dispatched their - * input — useful when diagnosing focus-theft reports or confirming that a - * call stayed on a headless tier. - */ -export type InputBackendKind = - | 'flutter-vm' - | 'simctl' - | 'webkit' - | 'applescript' - | 'simhid' - | 'ax-press' - | 'pointer-service'; - -/** - * A single tap event used in batch dispatch. Mirrors the signature of - * `InputBackend.tap` but excludes the `deviceId` (supplied once at the - * batch-call level) to avoid repetition in large queues. - */ -export interface BatchTapEvent { - x: number; - y: number; - /** Optional long-press duration in seconds. */ - duration?: number; -} - -export interface InputBackend { - /** Stable identifier used for observability / audit logging. */ - readonly kind: InputBackendKind; - - tap(deviceId: string, x: number, y: number, duration?: number): Promise; - swipe( - deviceId: string, - startX: number, - startY: number, - endX: number, - endY: number, - duration?: number, - ): Promise; - /** - * Type `text` into whatever is currently focused on `deviceId`. - * - * `delayMs` is an optional inter-character pause between consecutive key - * sends, in milliseconds. Only the simhid backend honours it (other - * backends bypass the software keyboard and have no equivalent failure - * mode); they may safely ignore the argument. Required for segmented - * OTP-style fields that drop characters when keys arrive too fast (issue - * #639 Problem 2). Default 0 (no pause). - */ - typeText(deviceId: string, text: string, delayMs?: number): Promise; - keypress(deviceId: string, keyCode: string): Promise; - sendKey(deviceId: string, keyName: string): Promise; - - /** - * Whether this backend supports the `tapBatch()` method for submitting - * multiple tap events in a single logical call. Callers MUST check this - * before calling `tapBatch()` — the method is absent on backends that - * return `false`. - * - * **Unsupported combinations**: `tapBatch` is intentionally NOT available - * on `SimctlInputBackend` (each simctl invocation opens a separate Xcode - * process, so batching at the TS level provides no meaningful reduction), - * `WebKitInputBackend` (JS injection is already in-process with no spawn - * cost), `FlutterVMInputBackend` (same — evaluate over a WebSocket), - * `AppleScriptInputBackend` (opt-in focus-stealing path; batching would - * hide per-tap activation overhead rather than remove it), and - * `PointerServiceInputBackend` (tap-ps subcommand is experimental; - * batching is deferred until Phase 2 of #590). - */ - supportsBatching(): boolean; - - /** - * Submit multiple tap events to `deviceId` sequentially, reducing the - * per-call overhead that a caller would otherwise pay by invoking - * `tap()` in a loop. - * - * Only available when `supportsBatching()` returns `true`. Callers must - * guard with `supportsBatching()` before calling this method; calling - * it on a backend that does not advertise batching support is a - * programming error and will throw. - * - * The events are dispatched in order. If any event fails, the batch - * stops and rejects with that error — already-dispatched events are - * NOT rolled back (HID injection is fire-and-forget at the OS level). - */ - tapBatch?(deviceId: string, events: BatchTapEvent[]): Promise; -} - -// ── SimctlInputBackend ─────────────────────────────────────────────────────── - -/** - * Uses `xcrun simctl io input` subcommands. - * Available on Xcode versions that ship the `input` subcommand (typically ≤ 16). - */ -export class SimctlInputBackend implements InputBackend { - readonly kind = 'simctl' as const; - private simctl: SimctlExecutor; - - constructor(simctl?: SimctlExecutor) { - this.simctl = simctl ?? new SimctlExecutor(); - } - - async tap(deviceId: string, x: number, y: number, duration?: number): Promise { - await timedInput(this.kind, 'tap', deviceId, async () => { - if (duration && duration > 0) { - await this.simctl.exec([ - 'io', deviceId, 'input', 'press', - String(x), String(y), String(duration), - ]); - } else { - await this.simctl.exec(['io', deviceId, 'input', 'tap', String(x), String(y)]); - } - }); - } - - async swipe( - deviceId: string, - startX: number, startY: number, - endX: number, endY: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'swipe', deviceId, async () => { - try { - await this.simctl.exec([ - 'io', deviceId, 'input', 'swipe', - String(startX), String(startY), String(endX), String(endY), - ]); - } catch { - // Fallback: `drag` accepts a duration argument - await this.simctl.exec([ - 'io', deviceId, 'input', 'drag', - String(startX), String(startY), String(endX), String(endY), - String(duration ?? 0.5), - ]); - } - }); - } - - async typeText(deviceId: string, text: string): Promise { - await timedInput(this.kind, 'typeText', deviceId, async () => { - await this.simctl.exec(['io', deviceId, 'input', 'text', text]); - }); - } - - async keypress(deviceId: string, keyCode: string): Promise { - await timedInput(this.kind, 'keypress', deviceId, async () => { - await this.simctl.exec(['io', deviceId, 'input', 'keypress', keyCode]); - }); - } - - async sendKey(deviceId: string, keyName: string): Promise { - await timedInput(this.kind, 'sendKey', deviceId, async () => { - await this.simctl.exec(['io', deviceId, 'sendkey', keyName]); - }); - } - - /** - * Batching is not supported on SimctlInputBackend. Each `xcrun simctl io - * input` invocation opens a separate Xcode IPC channel; accumulating calls - * at the TypeScript level would not reduce that per-call overhead. - */ - supportsBatching(): boolean { - return false; - } -} - -// ── AppleScriptInputBackend ────────────────────────────────────────────────── - -/** - * AppleScript key-code mapping (macOS virtual key codes). - * Used to translate HID key codes and key names to AppleScript `key code` values. - */ -const HID_TO_APPLESCRIPT: Record = { - '40': 36, // Return / Enter - '41': 53, // Escape - '42': 51, // Backspace / Delete - '43': 48, // Tab - '44': 49, // Space - '74': 115, // Home - '79': 124, // Right arrow - '80': 123, // Left arrow - '81': 125, // Down arrow - '82': 126, // Up arrow -}; - -const SENDKEY_TO_APPLESCRIPT: Record = { - Return: 36, - Escape: 53, - Tab: 48, - Space: 49, - Delete: 51, - Home: 115, -}; - -/** - * Uses AppleScript (`osascript`) and Swift CGEvent for input. - * Works on any Xcode version as it bypasses `simctl io input` entirely. - * - * Requires: - * - Accessibility permissions for System Events - * - Simulator app running and visible - * - * Coordinate translation assumes Simulator is at default "Point Accurate" (1:1) zoom. - */ -export class AppleScriptInputBackend implements InputBackend { - readonly kind = 'applescript' as const; - - /** - * Per-device cache for the resolved content origin. - * Key: deviceId, Value: { x, y, winX, winY } where winX/winY is the window - * top-left at the time of the last measurement (used to detect window moves). - */ - private originCache = new Map(); - - /** Set of deviceIds that have already emitted the AX fallback warning. */ - private warnedDevices = new Set(); - - /** - * Timestamp of the last successful Simulator activation (ms since epoch). - * Retained for observability and potential future diagnostics; no longer - * used to gate the frontmost-app check — every `activateSimulator()` call - * queries System Events to confirm current focus state before deciding - * whether to activate. - * - * This field is scoped to the AppleScript backend instance and does NOT - * affect any headless tier. It is NOT shared with `getInputBackend()`. - */ - private static readonly ACTIVATION_CACHE_TTL_MS = 500; - private lastActivationAt = 0; - - private async runAppleScript(lines: string[]): Promise { - const args = lines.flatMap((line) => ['-e', line]); - const { stdout } = await execFileAsync('osascript', args, { timeout: 10_000 }); - return stdout.trim(); - } - - /** - * Activate Simulator.app via AppleScript when it is not already frontmost. - * - * On every call we query System Events for the current frontmost process - * name. If Simulator is already frontmost we skip the `activate` call and - * the 150 ms settle delay — the frontmost check is a single cheap osascript - * IPC round-trip (~5–10 ms) and is always correct regardless of how recently - * the last activation occurred. - * - * `lastActivationAt` is retained for observability / future diagnostics but - * no longer gates the frontmost check — removing the TTL early-return - * ensures input is never delivered to the wrong app when focus changes - * between consecutive calls in a burst. - * - * This optimisation applies ONLY to the opt-in focus-stealing path — - * all headless backends skip this method entirely. - */ - private async activateSimulator(): Promise { - // Always check frontmost state; the IPC cost (~5–10 ms) is cheaper than - // the risk of delivering input to the wrong app after a focus change. - const frontApp = await this.runAppleScript([ - 'tell application "System Events" to set frontApp to name of first application process whose frontmost is true', - 'return frontApp', - ]); - if (frontApp !== 'Simulator') { - await this.runAppleScript(['tell application "Simulator" to activate']); - await delay(150); - } - this.lastActivationAt = Date.now(); - } - - /** - * Get the Simulator window's content-area origin in macOS screen coordinates - * by querying the position of the first child UI element (the iOS device - * content area within the macOS window). This avoids hardcoding any title-bar - * height offset and handles Xcode 26 where the AX bridge already returns - * frames in window-relative coordinates. - * - * On any AppleScript failure, falls back to the raw window position (offset 0) - * and emits one `console.error` warning per device. The result is cached per - * deviceId; pass `{ refresh: true }` to invalidate the cache. - */ - async getSimulatorContentOrigin( - deviceId: string, - options?: { refresh?: boolean }, - ): Promise<{ x: number; y: number }> { - if (!options?.refresh) { - const cached = this.originCache.get(deviceId); - if (cached) { - return { x: cached.x, y: cached.y }; - } - } - - let winX = 0; - let winY = 0; - let contentX = 0; - let contentY = 0; - - try { - const result = await this.runAppleScript([ - 'tell application "System Events"', - ' tell process "Simulator"', - ' set winPos to position of window 1', - ' set wx to item 1 of winPos', - ' set wy to item 2 of winPos', - ' set childPos to position of UI element 1 of window 1', - ' set cx to item 1 of childPos', - ' set cy to item 2 of childPos', - ' return (wx as text) & "," & (wy as text) & "|" & (cx as text) & "," & (cy as text)', - ' end tell', - 'end tell', - ]); - - const [winPart, childPart] = result.split('|'); - if (!winPart || !childPart) { - throw new Error(`Unexpected AX output: ${result}`); - } - const [px, py] = winPart.split(',').map(Number); - const [cx, cy] = childPart.split(',').map(Number); - if ([px, py, cx, cy].some((n) => !isFinite(n))) { - throw new Error(`Non-numeric values in AX output: ${result}`); - } - winX = px; - winY = py; - contentX = cx; - contentY = cy; - } catch (err) { - // Fallback: use raw window position (zero title-bar offset). - // Only warn once per device to avoid log spam. - if (!this.warnedDevices.has(deviceId)) { - this.warnedDevices.add(deviceId); - console.error( - `[input-backend] AppleScript AX content-origin query failed for device ${deviceId}; ` + - `falling back to window position (offset 0). ` + - `Error: ${err instanceof Error ? err.message : String(err)}`, - ); - } - - // Attempt a simpler query to get the window position for the fallback. - try { - const winResult = await this.runAppleScript([ - 'tell application "System Events"', - ' tell process "Simulator"', - ' set winPos to position of window 1', - ' set wx to item 1 of winPos', - ' set wy to item 2 of winPos', - ' return (wx as text) & "," & (wy as text)', - ' end tell', - 'end tell', - ]); - const [fx, fy] = winResult.split(',').map(Number); - if (isFinite(fx) && isFinite(fy)) { - winX = fx; - winY = fy; - } - } catch { - // If even the fallback fails, use 0,0. - } - contentX = winX; - contentY = winY; - } - - this.originCache.set(deviceId, { x: contentX, y: contentY, winX, winY }); - return { x: contentX, y: contentY }; - } - - /** - * Translate iOS point coordinates to absolute macOS screen coordinates. - * Assumes 1:1 point mapping (Simulator at default zoom). - * - * Uses the cached origin from `getSimulatorContentOrigin`. If the user - * moves the window or rotates the device, callers must explicitly invalidate - * the cache via `getSimulatorContentOrigin(deviceId, { refresh: true })`. - */ - private async toScreen( - deviceId: string, - x: number, - y: number, - ): Promise<{ sx: number; sy: number }> { - const origin = await this.getSimulatorContentOrigin(deviceId); - return { - sx: Math.round(origin.x + x), - sy: Math.round(origin.y + y), - }; - } - - async tap(deviceId: string, x: number, y: number, duration?: number): Promise { - await timedInput(this.kind, 'tap', deviceId, async () => { - await this.activateSimulator(); - const { sx, sy } = await this.toScreen(deviceId, x, y); - - if (duration && duration > 0) { - // Long press: mouse down → wait → mouse up via Swift CGEvent - await execFileAsync('swift', ['-e', [ - 'import Cocoa', - `let p = CGPoint(x: ${sx}, y: ${sy})`, - 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', - `Thread.sleep(forTimeInterval: ${duration})`, - 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', - ].join('\n')], { timeout: Math.max(15_000, duration * 1000 + 5000) }); - } else { - await this.runAppleScript([ - `tell application "System Events" to click at {${sx}, ${sy}}`, - ]); - } - }); - } - - async swipe( - deviceId: string, - startX: number, startY: number, - endX: number, endY: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'swipe', deviceId, async () => { - await this.activateSimulator(); - // Get origin once for both start and end coordinates - const origin = await this.getSimulatorContentOrigin(deviceId); - const sx = Math.round(origin.x + startX); - const sy = Math.round(origin.y + startY); - const ex = Math.round(origin.x + endX); - const ey = Math.round(origin.y + endY); - const dur = duration ?? 0.5; - const steps = 20; - const stepDelay = dur / steps; - - // Mouse drag via Swift CGEvent (macOS built-in, no external deps) - await execFileAsync('swift', ['-e', [ - 'import Cocoa', - `let x1: CGFloat = ${sx}, y1: CGFloat = ${sy}`, - `let x2: CGFloat = ${ex}, y2: CGFloat = ${ey}`, - `let steps = ${steps}`, - `let stepDelay = ${stepDelay}`, - 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseDown, mouseCursorPosition: CGPoint(x: x1, y: y1), mouseButton: .left)!.post(tap: .cghidEventTap)', - 'Thread.sleep(forTimeInterval: 0.05)', - 'for i in 1...steps {', - ' let t = CGFloat(i) / CGFloat(steps)', - ' let p = CGPoint(x: x1 + (x2 - x1) * t, y: y1 + (y2 - y1) * t)', - ' CGEvent(mouseEventSource: nil, mouseType: .leftMouseDragged, mouseCursorPosition: p, mouseButton: .left)!.post(tap: .cghidEventTap)', - ' Thread.sleep(forTimeInterval: stepDelay)', - '}', - 'CGEvent(mouseEventSource: nil, mouseType: .leftMouseUp, mouseCursorPosition: CGPoint(x: x2, y: y2), mouseButton: .left)!.post(tap: .cghidEventTap)', - ].join('\n')], { timeout: 15_000 }); - }); - } - - async typeText(deviceId: string, text: string): Promise { - await timedInput(this.kind, 'typeText', deviceId, async () => { - await this.activateSimulator(); - // Escape special AppleScript characters - const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); - await this.runAppleScript([ - `tell application "System Events" to keystroke "${escaped}"`, - ]); - }); - } - - async keypress(deviceId: string, keyCode: string): Promise { - await timedInput(this.kind, 'keypress', deviceId, async () => { - await this.activateSimulator(); - const asKeyCode = HID_TO_APPLESCRIPT[keyCode]; - if (asKeyCode === undefined) { - throw new Error( - `Unknown HID key code "${keyCode}" for AppleScript backend. ` + - `Supported: ${Object.keys(HID_TO_APPLESCRIPT).join(', ')}`, - ); - } - await this.runAppleScript([ - `tell application "System Events" to key code ${asKeyCode}`, - ]); - }); - } - - async sendKey(deviceId: string, keyName: string): Promise { - await timedInput(this.kind, 'sendKey', deviceId, async () => { - await this.activateSimulator(); - const asKeyCode = SENDKEY_TO_APPLESCRIPT[keyName]; - if (asKeyCode === undefined) { - throw new Error( - `Unknown key name "${keyName}" for AppleScript backend. ` + - `Supported: ${Object.keys(SENDKEY_TO_APPLESCRIPT).join(', ')}`, - ); - } - await this.runAppleScript([ - `tell application "System Events" to key code ${asKeyCode}`, - ]); - }); - } - - /** - * Batching is not supported on AppleScriptInputBackend. This is the - * opt-in focus-stealing path; each tap must activate Simulator.app first, - * so there is no meaningful process-spawn reduction available. Callers - * that need repeated taps via this backend must invoke `tap()` in a loop. - */ - supportsBatching(): boolean { - return false; - } -} - -// ── WebKitInputBackend ────────────────────────────────────────────────── - -/** - * HID key-code → standard key name mapping for WebKit `press()`. - */ -const HID_TO_WEBKIT_KEY: Record = { - '40': 'Enter', - '41': 'Escape', - '42': 'Backspace', - '43': 'Tab', - '44': 'Space', - '74': 'Home', - '79': 'ArrowRight', - '80': 'ArrowLeft', - '81': 'ArrowDown', - '82': 'ArrowUp', -}; - -/** - * Named key → WebKit `press()` key name mapping. - */ -const SENDKEY_TO_WEBKIT_KEY: Record = { - Return: 'Enter', - Escape: 'Escape', - Tab: 'Tab', - Space: 'Space', - Delete: 'Backspace', - Home: 'Home', -}; - -/** - * Uses WebKit Remote Debugging Protocol (JavaScript touch events) for input. - * Completely focus-free — communicates over a TCP socket, so the Simulator - * window does not need to be in the foreground. - * - * Limitations: - * - Only works when Safari/WebView is connected via WebKit protocol - * - Touch events dispatched via JS have `isTrusted: false`, so native - * scroll is supplemented with an explicit `window.scrollBy()` call - */ -export class WebKitInputBackend implements InputBackend { - readonly kind = 'webkit' as const; - constructor(private client: BrowserBackend) {} - - async tap(deviceId: string, x: number, y: number, duration?: number): Promise { - await timedInput(this.kind, 'tap', deviceId, async () => { - if (duration && duration > 0) { - // Long press via touch events with delay - await this.client.evaluate( - buildLongPressScript({ x, y, durationMs: duration * 1000 }), - ); - } else { - // Normal tap — delegate to BrowserBackend.click() which dispatches - // touchstart → touchend → click with emulateUserGesture - await this.client.click({ x, y }); - } - }); - } +// ── Re-exports: types & interfaces ─────────────────────────────────────────── - async swipe( - deviceId: string, - startX: number, startY: number, - endX: number, endY: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'swipe', deviceId, async () => { - const scrollX = startX - endX; - const scrollY = startY - endY; - const steps = 20; - const stepDelay = ((duration ?? 0.5) * 1000) / steps; +export type { InputBackend, InputBackendKind, BatchTapEvent } from '../input/backend'; - // Two-pronged: window.scrollBy for native scroll + touch events for JS handlers - await this.client.evaluate( - buildSwipeScript({ - startX, startY, endX, endY, steps, stepDelayMs: stepDelay, - scroll: { scrollX, scrollY }, - }), - ); - }); - } +// ── Re-exports: concrete backends ──────────────────────────────────────────── - async typeText(deviceId: string, text: string): Promise { - await timedInput(this.kind, 'typeText', deviceId, async () => { - const escaped = JSON.stringify(text); - await this.client.evaluate(` - (function() { - var el = document.activeElement; - if (!el || el === document.body) return; - var p = Object.getPrototypeOf(el); - while (p && !Object.getOwnPropertyDescriptor(p, 'value')) { - p = Object.getPrototypeOf(p); - } - var desc = p ? Object.getOwnPropertyDescriptor(p, 'value') : null; - var cur = (desc && desc.get) ? desc.get.call(el) : (el.value || ''); - if (desc && desc.set) { - desc.set.call(el, cur + ${escaped}); - } else if ('value' in el) { - el.value = cur + ${escaped}; - } - el.dispatchEvent(new Event('input', { bubbles: true })); - el.dispatchEvent(new Event('change', { bubbles: true })); - })() - `); - }); - } +export { SimctlInputBackend } from '../input/simctl-backend'; +export { AppleScriptInputBackend } from '../input/applescript-backend'; +export { WebKitInputBackend } from '../input/webkit-backend'; - async keypress(deviceId: string, keyCode: string): Promise { - await timedInput(this.kind, 'keypress', deviceId, async () => { - const keyName = HID_TO_WEBKIT_KEY[keyCode]; - if (!keyName) { - throw new Error( - `Unknown HID key code "${keyCode}" for WebKit backend. ` + - `Supported: ${Object.keys(HID_TO_WEBKIT_KEY).join(', ')}`, - ); - } - await this.client.press(keyName); - }); - } +// ── Re-exports: key maps ───────────────────────────────────────────────────── - async sendKey(deviceId: string, keyName: string): Promise { - await timedInput(this.kind, 'sendKey', deviceId, async () => { - const mapped = SENDKEY_TO_WEBKIT_KEY[keyName] ?? keyName; - await this.client.press(mapped); - }); - } +export { + HID_TO_APPLESCRIPT, + SENDKEY_TO_APPLESCRIPT, +} from '../input/applescript-backend'; - /** - * Batching is not supported on WebKitInputBackend. JS injection executes - * in-process over an already-established WebSocket — there is no process - * spawn overhead to reduce. Each `tap()` call is already near-zero-cost - * from a spawn perspective. - */ - supportsBatching(): boolean { - return false; - } -} - -// ── HeadlessInputUnavailableError ──────────────────────────────────────────── - -/** - * Environment variable that opts in to the focus-stealing AppleScript / CGEvent - * input backend. When unset (the default), `getInputBackend()` refuses to - * instantiate `AppleScriptInputBackend` and throws `HeadlessInputUnavailableError` - * instead, preventing silent focus theft. - */ -export const OPENSAFARI_ALLOW_FOCUS_INPUT_ENV = 'OPENSAFARI_ALLOW_FOCUS_INPUT'; -export const OPENSAFARI_HEADLESS_ONLY_ENV = 'OPENSAFARI_HEADLESS_ONLY'; - -function isFocusInputAllowed(): boolean { - const value = process.env[OPENSAFARI_ALLOW_FOCUS_INPUT_ENV]; - return value === '1' || value === 'true'; -} - -function isHeadlessOnly(): boolean { - const value = process.env[OPENSAFARI_HEADLESS_ONLY_ENV]; - return value === '1' || value === 'true'; -} +export { + HID_TO_WEBKIT_KEY, + SENDKEY_TO_WEBKIT_KEY, +} from '../input/webkit-backend'; -/** - * Thrown by `getInputBackend()` when no headless input method is available and - * the caller has not opted in to the focus-stealing fallback. The error carries - * structured fields so MCP clients can surface actionable remediation to users - * without parsing the human-readable message. - */ -export class HeadlessInputUnavailableError extends Error { - readonly name = 'HeadlessInputUnavailableError' as const; - readonly deviceId: string; - readonly reason: - | 'no-simctl' - | 'no-webkit' - | 'webkit-disconnected' - | 'headless-only'; - readonly remediation: readonly string[]; - - constructor( - deviceId: string, - reason: HeadlessInputUnavailableError['reason'], - ) { - const remediation = - reason === 'headless-only' - ? ([ - `${OPENSAFARI_HEADLESS_ONLY_ENV}=1 is set — AppleScript/CGEvent fallback is blocked.`, - 'Ensure a headless backend (simctl, webkit, flutter-vm, simhid) is available.', - `To allow focus-stealing input, unset ${OPENSAFARI_HEADLESS_ONLY_ENV}.`, - ] as const) - : ([ - "Safari QA: call `set_active_context({ context: 'safari' })` to enable WebKitInputBackend", - `Native apps: opt in to the CGEvent fallback by setting ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV}=1 ` + - '(WARNING: will move the mouse cursor and bring Simulator.app to the foreground)', - ] as const); - const message = - `No headless input backend available for device ${deviceId} (reason: ${reason}).\n` + - remediation.map((line) => ` - ${line}`).join('\n'); - super(message); - this.deviceId = deviceId; - this.reason = reason; - this.remediation = remediation; - // Preserve prototype chain across the TypeScript down-compile - Object.setPrototypeOf(this, HeadlessInputUnavailableError.prototype); - } -} - -// ── Backend detection & singleton ──────────────────────────────────────────── +// ── Re-exports: error class + env var constants ─────────────────────────────── -let simctlAvailable: boolean | null = null; -let detectionPromise: Promise | null = null; -let cachedSimctlBackend: SimctlInputBackend | null = null; -let cachedAppleScriptBackend: AppleScriptInputBackend | null = null; -let focusInputOptInWarned = false; +export { + HeadlessInputUnavailableError, + OPENSAFARI_ALLOW_FOCUS_INPUT_ENV, + OPENSAFARI_HEADLESS_ONLY_ENV, +} from '../input/backend-resolver'; -// SimulatorKit HID backend cache (Tier 1) -let simHidProbed = false; -let cachedSimHidBackend: InputBackend | null = null; - -// PointerService backend cache (opt-in, Phase 1 of #590) -let pointerServiceProbed = false; -let cachedPointerServiceBackend: InputBackend | null = null; - -// Per-device cache of the Flutter VM client connection so subsequent Tier-0 -// lookups reuse an already-established WebSocket instead of re-running -// discovery on every call. Cleared via `resetInputBackend()`. +// ── Compatibility shim: module-level functions ──────────────────────────────── // -// Value semantics: -// - FlutterVMClient: positive hit (Flutter app connected; reuse) -// - null: negative hit (discovery already failed within NEGATIVE_CACHE_TTL_MS; -// skip discovery and let the caller fall through to Tier 1-3) -interface FlutterClientCacheEntry { - client: FlutterVMClient | null; - expiresAt: number; -} -const flutterClientCache = new Map(); - -// Negative cache TTL: after a failed discovery, don't re-probe for this long. -// Native iOS apps, Safari, and any simulator without a Flutter debug build -// would otherwise pay the full discovery cost on every `getInputBackend()` -// call, stalling tools like `app_scroll_native` / `app_tap` well past their -// unit-test timeouts. -const NEGATIVE_CACHE_TTL_MS = 30_000; +// These delegate to the default singleton InputBackendResolver so all existing +// callers see the same cached state as before. -// Upper bound on how long the initial VM-discovery probe is allowed to block. -// If discovery has not produced a connected client within this window, treat -// the device as non-Flutter so native-app code paths aren't penalised. -const DISCOVERY_TIMEOUT_MS = 1_500; +import { defaultResolver } from '../input/backend-resolver'; +import type { FlutterVMResolver } from '../input/flutter-resolver'; +import type { BrowserBackend } from '../types/browser-backend'; +import type { InputBackend } from '../input/backend'; /** - * Overridable resolver that returns a connected `FlutterVMClient` for the - * device, or `null` when no Flutter VM is discoverable (native app, Safari, - * simulator without Flutter debug build). The default implementation is - * swapped out by unit tests via `__setFlutterVMResolverForTest`. + * Get the input backend using the 4-tier fallback strategy. + * Delegates to the default InputBackendResolver singleton. + * + * @param deviceId Simulator UDID + * @param webkitClient Optional WebKit/Safari connection for Tier 2 + * @throws {HeadlessInputUnavailableError} When no headless method is available */ -type FlutterVMResolver = (deviceId: string) => Promise; - -async function defaultFlutterVMResolver( +export async function getInputBackend( deviceId: string, -): Promise { - const now = Date.now(); - const cached = flutterClientCache.get(deviceId); - if (cached && cached.expiresAt > now) { - // Fast path: cached positive hit that is still connected. - if (cached.client && cached.client.isConnected()) { - return cached.client; - } - // Fast path: cached negative hit within TTL. - if (cached.client === null) { - return null; - } - // Stale positive entry (client disconnected). Fall through to re-probe. - } - - // Bound the discovery probe so non-Flutter devices don't stall tools - // that legitimately just want Tier 1-3. - try { - const client = getFlutterVMClient(deviceId); - if (!client.isConnected()) { - let timeoutId: ReturnType | undefined; - const explicitUrl = process.env.OPENSAFARI_VM_SERVICE_URL; - const effectiveTimeout = explicitUrl ? 10_000 : DISCOVERY_TIMEOUT_MS; - const timeout = new Promise((_, reject) => { - timeoutId = setTimeout( - () => reject(new Error('flutter-vm-discovery-timeout')), - effectiveTimeout, - ); - }); - try { - await Promise.race([client.connect({ deviceId, vmServiceUrl: process.env.OPENSAFARI_VM_SERVICE_URL || undefined }), timeout]); - } finally { - if (timeoutId) clearTimeout(timeoutId); - } - } - if (!client.isConnected()) { - flutterClientCache.set(deviceId, { - client: null, - expiresAt: now + NEGATIVE_CACHE_TTL_MS, - }); - return null; - } - // The VM is reachable, but FlutterVMInputBackend can only drive input - // through `evaluate` — which requires DDS + the frontend compiler - // (debug/profile builds only). Release builds and apps launched via - // `xcrun simctl launch` expose the VM Service socket without the - // compile service, and any `evaluate` call rejects with `code: 113`. - // Probe once up-front so that case falls through to the next tier - // instead of surfacing the raw 113 error to the user. - const probe = await client.probeEvaluateCompile(); - if (!probe.available) { - // Close the orphaned WebSocket — the client is not reusable on negative - // probe, so leaving it in the singleton map leaks a file descriptor per - // discovery cycle on release-mode Flutter apps. - removeFlutterVMClient(deviceId); - if (probe.reason === 'compile-error-113') { - console.error( - `[input-backend] Flutter VM on ${deviceId} rejects evaluate (code 113). ` + - 'Likely a release build or `simctl launch` without `flutter run` — ' + - 'falling back past Tier 0. Set OPENSAFARI_DISABLE_AX_PRESS=0 to use ' + - 'Tier 1.5 for element-targeted taps.', - ); - } - flutterClientCache.set(deviceId, { - client: null, - expiresAt: now + NEGATIVE_CACHE_TTL_MS, - }); - return null; - } - flutterClientCache.set(deviceId, { client, expiresAt: Infinity }); - return client; - } catch { - // VM discovery / connect failures are expected for non-Flutter apps. - // Cache the negative result so the next call doesn't pay the probe cost. - flutterClientCache.set(deviceId, { - client: null, - expiresAt: now + NEGATIVE_CACHE_TTL_MS, - }); - return null; - } + webkitClient?: BrowserBackend | null, +): Promise { + return defaultResolver.getInputBackend(deviceId, webkitClient); } -let flutterVMResolver: FlutterVMResolver = defaultFlutterVMResolver; - /** * Attempt to resolve a FlutterVMClient for this device. Returns null whenever - * the device is not running a Flutter app in debug/profile mode. Never - * throws — VM discovery errors collapse to null so the tier fallback keeps - * working for native iOS apps. + * the device is not running a Flutter app in debug/profile mode. Never throws. * * Exposed so callers (e.g. routing diagnostics) can probe availability * without spinning up the backend; the public routing in `getInputBackend()` @@ -890,232 +84,30 @@ let flutterVMResolver: FlutterVMResolver = defaultFlutterVMResolver; */ export async function tryGetFlutterVMClient( deviceId: string, -): Promise { - try { - return await flutterVMResolver(deviceId); - } catch { - return null; - } -} - -/** - * Test seam: override the Flutter VM resolver. `null` restores the default. - * Only used by unit tests — mocking `getFlutterVMClient` module-wide is - * awkward because the singleton map lives inside the module. - */ -export function __setFlutterVMResolverForTest( - resolver: FlutterVMResolver | null, -): void { - flutterVMResolver = resolver ?? defaultFlutterVMResolver; -} -/** - * Probe whether `simctl io input` is available by attempting a no-op tap at (0,0). - * On Xcode 26+ this subcommand was removed and returns exit code 117. - */ -async function probeSimctlInput(deviceId: string): Promise { - const simctl = new SimctlExecutor(); - try { - await simctl.exec(['io', deviceId, 'input', 'tap', '0', '0'], { timeout: 5000 }); - return true; - } catch { - console.error( - '[input-backend] simctl io input unavailable (likely Xcode 26+ where this subcommand was removed)', - ); - return false; - } -} - -/** - * Attempt a single WebKit reconnect for a client that exists but reports - * `isConnected() === false`. Returns true if the client is usable after the - * attempt. Never throws — transient failures fall through to Tier 3. - */ -async function tryReconnectWebKit(client: BrowserBackend): Promise { - try { - await client.connect(); - return client.isConnected(); - } catch (err) { - console.error( - `[input-backend] WebKit reconnect attempt failed: ${err instanceof Error ? err.message : String(err)}`, - ); - return false; - } -} - -/** - * Get the input backend using a 4-tier fallback strategy with default-deny - * hardening for the focus-stealing path: - * - * 1. **SimulatorKitHIDInputBackend** — SimulatorKit private API (headless, - * any app, all Xcode versions). Uses `sim-hid-bridge` Swift helper. - * 2. **SimctlInputBackend** — `simctl io input` (headless, any app, Xcode ≤16) - * 3. **WebKitInputBackend** — JS touch events via WebKit protocol (headless, - * Safari only). If the supplied client exists but reports disconnected, - * one reconnect attempt is made before giving up. - * 4. **AppleScriptInputBackend** — CGEvent mouse synthesis, requires - * Simulator window focus. **Default-deny**: only instantiated when - * `OPENSAFARI_ALLOW_FOCUS_INPUT=1` (or `true`) is set in the environment. - * Without opt-in, this function throws `HeadlessInputUnavailableError` - * instead of silently stealing focus. - * - * The simctl probe result is cached for the process lifetime. WebKit - * availability is checked on each call (connection state can change). - * - * @param deviceId Simulator UDID - * @param webkitClient Optional WebKit/Safari connection for Tier 2 - * @throws {HeadlessInputUnavailableError} When no headless method is available - * and `OPENSAFARI_ALLOW_FOCUS_INPUT` is not set - */ -export async function getInputBackend( - deviceId: string, - webkitClient?: BrowserBackend | null, -): Promise { - // Tier 0: Flutter VM Service (headless, no focus stealing, no opt-in). - // When the target device is running a Flutter app in debug/profile mode we - // can inject pointer events directly into the Dart isolate, completely - // bypassing OS-level input. Returns null for native iOS apps and silently - // falls through to the existing tiers in that case. - const flutterClient = await tryGetFlutterVMClient(deviceId); - if (flutterClient) { - return new FlutterVMInputBackend(flutterClient); - } - - // Probe simctl once and cache the result - if (simctlAvailable === null) { - if (!detectionPromise) { - detectionPromise = probeSimctlInput(deviceId).then((available) => { - simctlAvailable = available; - return available; - }); - } - await detectionPromise; - } - - // Tier 1 (opt-in): PointerService backend — Phase 1 of #590. - // When OPENSAFARI_ENABLE_POINTERSERVICE=1, route coordinate tap through - // `sim-hid-bridge tap-ps` instead of the default SimHID tap path. Off by - // default; when unset the existing Tier-1 SimHID path is used unchanged. - // The probe runs once and caches the result for the process lifetime. - if (isPointerServiceEnabled()) { - if (!pointerServiceProbed) { - pointerServiceProbed = true; - try { - cachedPointerServiceBackend = await tryCreatePointerServiceBackend(); - } catch { - cachedPointerServiceBackend = null; - } - } - if (cachedPointerServiceBackend) { - return cachedPointerServiceBackend; - } - } - - // Tier 1: SimulatorKit HID (headless, works with any app — all Xcode versions) - if (!simHidProbed) { - simHidProbed = true; - try { - cachedSimHidBackend = await tryCreateSimulatorKitHIDBackend(); - } catch { - cachedSimHidBackend = null; - } - } - // Tier 1: SimulatorKit HID — re-enabled after #491 resolved the Xcode 26 - // gesture-recognizer regression. Provides headless coordinate-based - // tap/swipe/scroll for any app (native, Flutter, Safari). - if (cachedSimHidBackend) { - return cachedSimHidBackend; - } - - // Tier 2: simctl io input (headless, works with any app — Xcode ≤16) - if (simctlAvailable) { - if (!cachedSimctlBackend) { - cachedSimctlBackend = new SimctlInputBackend(); - } - return cachedSimctlBackend; - } - - // Tier 2: WebKit JS touch injection (headless, Safari web content only). - // If the client is present but disconnected, try a one-shot reconnect so - // transient drops (proxy restart, tab churn) do not flip us to Tier 3. - if (webkitClient) { - if (webkitClient.isConnected()) { - return new WebKitInputBackend(webkitClient); - } - const reconnected = await tryReconnectWebKit(webkitClient); - if (reconnected) { - return new WebKitInputBackend(webkitClient); - } - } - - // HEADLESS_ONLY safety net — block AppleScript fallback even if opt-in is set. - // This is the CI safety net: when OPENSAFARI_HEADLESS_ONLY=1, any attempt to - // fall through to the focus-stealing backend is a hard error. - if (isHeadlessOnly()) { - if (isFocusInputAllowed()) { - console.error( - `[input-backend] ${OPENSAFARI_HEADLESS_ONLY_ENV}=1 overrides ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV} — AppleScript backend disabled`, - ); - } - const reason: HeadlessInputUnavailableError['reason'] = 'headless-only'; - const err = new HeadlessInputUnavailableError(deviceId, reason); - console.error(`[input-backend] ${err.message}`); - throw err; - } - - // Tier 3: AppleScript/CGEvent fallback — DEFAULT-DENY. - // Without explicit opt-in, refuse to return a backend that would move the - // mouse cursor or steal Simulator focus. See issue #405. - if (!isFocusInputAllowed()) { - let reason: HeadlessInputUnavailableError['reason']; - if (!webkitClient) { - reason = 'no-webkit'; - } else { - reason = 'webkit-disconnected'; - } - const err = new HeadlessInputUnavailableError(deviceId, reason); - console.error(`[input-backend] ${err.message}`); - throw err; - } - - if (!focusInputOptInWarned) { - console.error( - `[input-backend] ${OPENSAFARI_ALLOW_FOCUS_INPUT_ENV}=1 is set — ` + - 'AppleScript/CGEvent backend is enabled. ' + - 'This will move the physical mouse cursor and activate Simulator.app.', - ); - focusInputOptInWarned = true; - } - - if (!cachedAppleScriptBackend) { - cachedAppleScriptBackend = new AppleScriptInputBackend(); - } - return cachedAppleScriptBackend; +): Promise { + return defaultResolver.tryGetFlutterVMClient(deviceId); } /** Reset the cached backend state. Exported for testing only. */ export function resetInputBackend(): void { - simctlAvailable = null; - detectionPromise = null; - cachedSimctlBackend = null; - cachedAppleScriptBackend = null; - focusInputOptInWarned = false; - flutterClientCache.clear(); - flutterVMResolver = defaultFlutterVMResolver; - simHidProbed = false; - cachedSimHidBackend = null; - pointerServiceProbed = false; - cachedPointerServiceBackend = null; + defaultResolver.reset(); } /** - * Current number of entries in the Flutter VM discovery cache (includes both - * positive hits and negative entries that have not yet expired). Exposed for - * the cache-budget survey (#554) so `diagnose` can flag this cache when it - * outgrows the budget documented in `docs/memory-budget.md`. + * Current number of entries in the Flutter VM discovery cache. + * Exposed for the cache-budget survey (#554). */ export function getFlutterClientCacheSize(): number { - return flutterClientCache.size; + return defaultResolver.getFlutterClientCacheSize(); } -// Re-export for convenience -export { HID_TO_APPLESCRIPT, SENDKEY_TO_APPLESCRIPT, HID_TO_WEBKIT_KEY, SENDKEY_TO_WEBKIT_KEY }; +/** + * Test seam: override the Flutter VM resolver on the default singleton. + * Pass `null` to restore the default. + * Only used by unit tests. + */ +export function __setFlutterVMResolverForTest( + resolver: FlutterVMResolver | null, +): void { + defaultResolver.setFlutterVMResolver(resolver); +} diff --git a/src/tools/pasteboard-input.ts b/src/tools/pasteboard-input.ts index 1d341207..c1aff909 100644 --- a/src/tools/pasteboard-input.ts +++ b/src/tools/pasteboard-input.ts @@ -30,7 +30,7 @@ import { SimulatorKitHIDInputBackend, tryCreateSimulatorKitHIDBackend, InputBackendError, -} from './sim-hid-input-backend'; +} from '../input/sim-hid-backend'; import { getAccessibilityBridge } from '../native'; import { matchLabel as matchButtonLabel } from './localized-button-matcher'; diff --git a/src/tools/pointer-service-input-backend.ts b/src/tools/pointer-service-input-backend.ts index a2ee1a92..8e84a969 100644 --- a/src/tools/pointer-service-input-backend.ts +++ b/src/tools/pointer-service-input-backend.ts @@ -1,231 +1,17 @@ /** - * PointerServiceInputBackend — Phase 1 opt-in wrapper around the - * `sim-hid-bridge tap-ps` subcommand for Xcode 26+ coordinate tap. + * Compatibility re-export shim for `src/input/pointer-service-backend`. * - * Motivation (issue #590): Apple dropped `IndigoHIDMessageForMouseNSEvent` - * handling in CoreSimulator on Xcode 26. The Tier-1 SimulatorKitHID bare - * mouse path has therefore been gated off for tap/swipe, forcing - * coordinate-based `app_tap` / `app_swipe_native` to fall through to the - * focus-stealing AppleScript / CGEvent backend. The PointerService probe - * shipped in #555 wraps the same mouse events with - * `IndigoHIDMessageToCreatePointerService` / `RemovePointerService` - * brackets; the synthesis doc (#557) falsified this as a *fix*, but it - * remains the most-likely-to-help interim stop-gap because: + * The implementation was moved to `src/input/pointer-service-backend.ts` as + * part of the #707 (b) consolidation. This file re-exports every + * previously-public symbol so existing callers (tests, tools) continue to + * work without modification. * - * - It requires no simulator-side changes. - * - Telemetry is cheap to collect under an opt-in flag. - * - If field data shows ≥ 99 % success, #590 Phase 2 promotes it to the - * default Tier-1 path. - * - * Status: **opt-in experimental**. Activated only when - * `OPENSAFARI_ENABLE_POINTERSERVICE=1` is set. Defaults off so CI that - * has already adapted to the AppleScript fallback is unaffected. - * - * Scope: only `tap` is routed through the `tap-ps` subcommand. The Swift - * bridge does not yet expose `swipe-ps`; swipe / typeText / keypress / - * sendKey delegate to the underlying `SimulatorKitHIDInputBackend`, which - * keeps non-tap input paths on their existing (keyboard-safe) Tier-1 - * route. On Xcode 26+ that delegated SimHID swipe path is itself gated - * off and throws `HeadlessInputUnavailableError`; the PointerService - * backend is cached as the selected backend by `getInputBackend`, so the - * throw does NOT re-enter the tier chain. See the comment on `swipe()` - * below and issue #649 for the caller-visible contract. Extending - * `sim-hid-bridge` with pointer-service-bracketed swipe and promoting - * the backend to the default chain are tracked as Phase 2 follow-ups in - * #590. - */ - -import { existsSync } from 'fs'; -import * as path from 'path'; -import type { InputBackend } from './native-input-backend'; -import { - SimulatorKitHIDInputBackend, - InputBackendError, -} from './sim-hid-input-backend'; -import { timedInput } from '../metrics/input-telemetry'; - -/** - * Env flag that enables the PointerService backend (Phase 1 opt-in). - * Accepted values: `1`, `true`. Anything else is ignored. - */ -export const OPENSAFARI_ENABLE_POINTERSERVICE_ENV = - 'OPENSAFARI_ENABLE_POINTERSERVICE'; - -export function isPointerServiceEnabled(): boolean { - const value = process.env[OPENSAFARI_ENABLE_POINTERSERVICE_ENV]; - return value === '1' || value === 'true'; -} - -/** - * PointerService tap + delegated swipe/keys. - * - * Composes a `SimulatorKitHIDInputBackend` for the subset of methods that - * still share the default `sim-hid-bridge` subcommand set, and shells out - * directly to `sim-hid-bridge tap-ps ...` for `tap`. - */ -export class PointerServiceInputBackend implements InputBackend { - readonly kind = 'pointer-service' as const; - - constructor( - private readonly bridgePath: string, - private readonly delegate: SimulatorKitHIDInputBackend, - ) {} - - async tap(deviceId: string, x: number, y: number, duration?: number): Promise { - await timedInput(this.kind, 'tap', deviceId, async () => { - const args = [deviceId, 'tap-ps', String(x), String(y)]; - if (duration !== undefined && duration > 0) { - args.push(String(duration)); - } - await runTapPs(this.bridgePath, args); - }); - } - - async swipe( - deviceId: string, - startX: number, - startY: number, - endX: number, - endY: number, - duration?: number, - ): Promise { - // Phase 1: no swipe-ps subcommand exists yet. Delegate straight to the - // underlying SimulatorKitHIDInputBackend. On Xcode 26+ the `sim-hid-bridge - // swipe` subcommand exits with `SIMULATORKIT_UNAVAILABLE` (or another - // non-zero SimulatorKit code), and the delegate surfaces that to the caller - // as an `InputBackendError` — not `HeadlessInputUnavailableError`, which is - // produced one layer up in `native-input-backend.getInputBackend` when - // selecting a backend, never from a backend's own swipe() method. - // - // That error does NOT re-enter the tier chain because - // PointerServiceInputBackend is cached as the selected backend in - // getInputBackend once OPENSAFARI_ENABLE_POINTERSERVICE=1 resolves it. For - // completeness, any other error type the delegate may raise in the future - // is also passed through unchanged; see the "swipe propagates … without - // wrapping" tests for the frozen contract. - // - // Callers that need swipe fallback on Xcode 26+ must either (a) leave - // OPENSAFARI_ENABLE_POINTERSERVICE unset so the standard Tier-1 SimHID / - // focus-input chain is selected for every call, or (b) invoke an - // element-targeted swipe so the AX-press tier handles the gesture. - // Promoting this to a real in-tool tier downgrade is tracked under #590 - // Phase 2 alongside `sim-hid-bridge swipe-ps`. See #649 for the decision - // record. - await this.delegate.swipe(deviceId, startX, startY, endX, endY, duration); - } - - async typeText(deviceId: string, text: string): Promise { - await this.delegate.typeText(deviceId, text); - } - - async keypress(deviceId: string, keyCode: string): Promise { - await this.delegate.keypress(deviceId, keyCode); - } - - async sendKey(deviceId: string, keyName: string): Promise { - await this.delegate.sendKey(deviceId, keyName); - } - - /** - * Batching is not supported on PointerServiceInputBackend. The `tap-ps` - * subcommand is Phase 1 experimental (opt-in via - * `OPENSAFARI_ENABLE_POINTERSERVICE=1`); batching is deferred to Phase 2 - * of #590 alongside the `swipe-ps` subcommand. Callers that need repeated - * taps via the pointer-service path must invoke `tap()` in a loop. - */ - supportsBatching(): boolean { - return false; - } -} - -/** - * Execute `sim-hid-bridge` with the given argv. Mirrors the spawn/parse - * contract of `SimulatorKitHIDInputBackend.run` but is narrowed to the - * single `tap-ps` subcommand used by the PointerService backend, so the - * failure-handling path stays local and auditable. + * New consumers should import directly from `../input/pointer-service-backend`. */ -async function runTapPs(bridgePath: string, args: string[]): Promise { - const { execFile } = await import('child_process'); - const { promisify } = await import('util'); - const execFileAsync = promisify(execFile); - - const isSwiftSource = bridgePath.endsWith('.swift'); - const cmd = isSwiftSource ? 'swift' : bridgePath; - const cmdArgs = isSwiftSource ? [bridgePath, ...args] : args; - - let stdout = ''; - let stderr = ''; - try { - const result = await execFileAsync(cmd, cmdArgs, { - timeout: 10_000, - maxBuffer: 1 * 1024 * 1024, - }); - stdout = result.stdout ?? ''; - stderr = result.stderr ?? ''; - } catch (err) { - const e = err as NodeJS.ErrnoException & { - stdout?: string; - stderr?: string; - code?: number | string; - killed?: boolean; - }; - stdout = e.stdout ?? ''; - stderr = e.stderr ?? ''; - const exit = typeof e.code === 'number' ? e.code : undefined; - const hint = stderr.trim() || stdout.trim() || e.message; - throw new InputBackendError( - `sim-hid-bridge tap-ps exited ${exit ?? '?'}: ${hint}`, - 'UNKNOWN', - stderr, - ); - } - if (!stdout.trim()) return; - try { - const parsed = JSON.parse(stdout) as { ok?: boolean; error?: string }; - if (parsed.ok === false) { - throw new InputBackendError( - parsed.error ?? 'sim-hid-bridge tap-ps reported ok=false', - 'UNKNOWN', - stderr, - ); - } - } catch (err) { - if (err instanceof InputBackendError) throw err; - const safeStdout = stdout - .slice(0, 200) - .replace(/[\x00-\x1f\x7f]/g, '?'); - throw new InputBackendError( - `sim-hid-bridge tap-ps produced non-JSON stdout: ${safeStdout}`, - 'JSON_PARSE_FAILURE', - stderr, - ); - } -} - -/** - * Factory mirroring `tryCreateSimulatorKitHIDBackend`: locate a usable - * `sim-hid-bridge` helper and wrap it as a `PointerServiceInputBackend`. - * Returns `null` when the helper is not installed — callers are expected - * to fall through to the default SimHID tier. - */ -export async function tryCreatePointerServiceBackend(): Promise { - const candidates = [ - path.resolve(__dirname, '..', 'sim-hid-bridge'), - path.resolve(__dirname, 'sim-hid-bridge'), - path.resolve(__dirname, '..', 'sim-hid-bridge.swift'), - path.resolve(__dirname, 'sim-hid-bridge.swift'), - ]; - if (process.env.OPENSAFARI_ALLOW_SWIFT_INTERPRETER === '1') { - candidates.push( - path.resolve(__dirname, '..', '..', 'src', 'native', 'sim-hid-bridge.swift'), - ); - } - for (const candidate of candidates) { - if (existsSync(candidate)) { - const delegate = new SimulatorKitHIDInputBackend(candidate); - return new PointerServiceInputBackend(candidate, delegate); - } - } - return null; -} +export { + OPENSAFARI_ENABLE_POINTERSERVICE_ENV, + isPointerServiceEnabled, + PointerServiceInputBackend, + tryCreatePointerServiceBackend, +} from '../input/pointer-service-backend'; diff --git a/src/tools/sim-hid-input-backend.ts b/src/tools/sim-hid-input-backend.ts index d41f0550..11484029 100644 --- a/src/tools/sim-hid-input-backend.ts +++ b/src/tools/sim-hid-input-backend.ts @@ -1,513 +1,19 @@ /** - * SimulatorKitHIDInputBackend — Node wrapper around the `sim-hid-bridge` - * Swift helper described in issue #483. + * Compatibility re-export shim for `src/input/sim-hid-backend`. * - * Status: PoC. Backend class is shipped for integration and unit testing, but - * routing in `native-input-backend.ts` is intentionally NOT wired up yet. See - * the `TODO(#483)` comment there. + * The implementation was moved to `src/input/sim-hid-backend.ts` as part of + * the #707 (b) consolidation. This file re-exports every previously-public + * symbol so existing callers (tests, tools) continue to work without + * modification. * - * The Swift bridge spawns as a short-lived child process and communicates via - * argv (command) + stdout (newline-terminated JSON). Exit codes are the - * contract between Swift and Node: - * - * 0 — success - * 64 — BAD_ARGS (EX_USAGE) - * 69 — DEVICE_NOT_BOOTED (EX_UNAVAILABLE) - * 78 — SIMULATORKIT_UNAVAILABLE (EX_CONFIG — dlopen failed) - * 99 — NOT_IMPLEMENTED (PoC stub path) - * * — UNKNOWN (stderr surfaced verbatim) - * - * The current Swift implementation is a PoC stub that proves the dlopen path - * works and always exits with 99 NOT_IMPLEMENTED. This wrapper classifies that - * (and every other documented exit code) into a structured `InputBackendError` - * so the routing layer can decide to fall through to the next tier. - */ - -import { execFile } from 'child_process'; -import { promisify } from 'util'; -import { existsSync } from 'fs'; -import * as path from 'path'; -import type { InputBackend, BatchTapEvent } from './native-input-backend'; -import { timedInput } from '../metrics/input-telemetry'; - -const execFileAsync = promisify(execFile); - -/** Reference appended to error messages for private-framework failures. */ -const PRIVATE_API_DOC_REF = 'See docs/private-apis.md'; - -/** Latch so the private-API warning is emitted only once per process. */ -let warnedAboutPrivateAPI = false; - -/** - * Reset the private-API warning latch. Exported for unit tests only — do not - * call from production code. - */ -export function resetSimHidPrivateAPIWarning(): void { - warnedAboutPrivateAPI = false; -} - -/** Spawn timeout for the Swift helper. Matches idb's default. */ -const SPAWN_TIMEOUT_MS = 10_000; - - -/** HID usage page 0x07 (Keyboard/Keypad) — subset we map for pressKey(). */ -const KEY_NAME_TO_HID_USAGE: Record = { - Enter: 0x28, - Return: 0x28, - Escape: 0x29, - Backspace: 0x2a, - Delete: 0x2a, - Tab: 0x2b, - Space: 0x2c, - ArrowRight: 0x4f, - ArrowLeft: 0x50, - ArrowDown: 0x51, - ArrowUp: 0x52, - Home: 0x4a, -}; - -/** - * HID usage of the LeftShift modifier (Keyboard/Keypad page 0x07). - * Sent alongside a character key via the bridge's `key-mod` subcommand for - * every ASCII symbol that requires Shift on a US keyboard (uppercase letters, - * `!@#$%^&*()_+{}|:"<>?~`). - */ -const HID_USAGE_LEFT_SHIFT = 0xe1; - -/** - * US-keyboard printable ASCII → HID usage + whether Shift must be held. - * - * Covers U+0020 (space) through U+007E (tilde) — i.e. every character produced - * by a US layout without dead keys or IME. Returns null for everything else, - * including control characters (tab, newline), DEL, and any non-ASCII byte. - * - * Reference: USB HID Usage Tables v1.21, §10 Keyboard/Keypad (page 0x07). - */ -function asciiToHidKey(ch: string): { usage: number; shift: boolean } | null { - if (ch.length !== 1) return null; - const code = ch.charCodeAt(0); - if (code < 0x20 || code > 0x7e) return null; - // Lowercase letters → HID 0x04..0x1D - if (code >= 0x61 && code <= 0x7a) return { usage: 0x04 + (code - 0x61), shift: false }; - // Uppercase letters → same keys, but Shift is required - if (code >= 0x41 && code <= 0x5a) return { usage: 0x04 + (code - 0x41), shift: true }; - // Digits '1'..'9' → 0x1E..0x26 - if (code >= 0x31 && code <= 0x39) return { usage: 0x1e + (code - 0x31), shift: false }; - if (code === 0x30) return { usage: 0x27, shift: false }; // '0' - switch (ch) { - case ' ': return { usage: 0x2c, shift: false }; - case '-': return { usage: 0x2d, shift: false }; - case '_': return { usage: 0x2d, shift: true }; - case '=': return { usage: 0x2e, shift: false }; - case '+': return { usage: 0x2e, shift: true }; - case '[': return { usage: 0x2f, shift: false }; - case '{': return { usage: 0x2f, shift: true }; - case ']': return { usage: 0x30, shift: false }; - case '}': return { usage: 0x30, shift: true }; - case '\\': return { usage: 0x31, shift: false }; - case '|': return { usage: 0x31, shift: true }; - case ';': return { usage: 0x33, shift: false }; - case ':': return { usage: 0x33, shift: true }; - case "'": return { usage: 0x34, shift: false }; - case '"': return { usage: 0x34, shift: true }; - case '`': return { usage: 0x35, shift: false }; - case '~': return { usage: 0x35, shift: true }; - case ',': return { usage: 0x36, shift: false }; - case '<': return { usage: 0x36, shift: true }; - case '.': return { usage: 0x37, shift: false }; - case '>': return { usage: 0x37, shift: true }; - case '/': return { usage: 0x38, shift: false }; - case '?': return { usage: 0x38, shift: true }; - case '!': return { usage: 0x1e, shift: true }; - case '@': return { usage: 0x1f, shift: true }; - case '#': return { usage: 0x20, shift: true }; - case '$': return { usage: 0x21, shift: true }; - case '%': return { usage: 0x22, shift: true }; - case '^': return { usage: 0x23, shift: true }; - case '&': return { usage: 0x24, shift: true }; - case '*': return { usage: 0x25, shift: true }; - case '(': return { usage: 0x26, shift: true }; - case ')': return { usage: 0x27, shift: true }; - } - return null; -} - -/** - * Error emitted by `SimulatorKitHIDInputBackend`. Mirrors the convention used - * by `AccessibilityBridgeError` (see `src/native/accessibility-bridge.ts`): - * a stable machine-readable `code` plus the human-readable `message`. + * New consumers should import directly from `../input/sim-hid-backend`. */ -export class InputBackendError extends Error { - readonly name = 'InputBackendError' as const; - constructor( - message: string, - public readonly code: InputBackendErrorCode, - public readonly stderr?: string, - ) { - super(message); - Object.setPrototypeOf(this, InputBackendError.prototype); - } -} -export type InputBackendErrorCode = - | 'BAD_ARGS' - | 'DEVICE_NOT_BOOTED' - | 'SIMULATORKIT_UNAVAILABLE' - | 'NOT_IMPLEMENTED' - | 'SPAWN_TIMEOUT' - | 'BRIDGE_NOT_FOUND' - | 'HID_BRIDGE_MISSING' - | 'JSON_PARSE_FAILURE' - | 'UNKNOWN'; - -/** Map Swift bridge exit codes to structured error codes. */ -function codeForExit(exit: number | undefined): InputBackendErrorCode { - switch (exit) { - case 64: return 'BAD_ARGS'; - case 69: return 'DEVICE_NOT_BOOTED'; - case 78: return 'SIMULATORKIT_UNAVAILABLE'; - case 99: return 'NOT_IMPLEMENTED'; - default: return 'UNKNOWN'; - } -} - -/** - * SimulatorKit HID input backend. Spawns `sim-hid-bridge` per call and parses - * the JSON status envelope. All methods throw `InputBackendError` on failure. - */ -export class SimulatorKitHIDInputBackend implements InputBackend { - readonly kind = 'simhid' as const; - - constructor(private readonly bridgePath: string) {} - - async tap(deviceId: string, x: number, y: number, duration?: number): Promise { - await timedInput(this.kind, 'tap', deviceId, async () => { - const args = [deviceId, 'tap', String(x), String(y)]; - if (duration !== undefined && duration > 0) { - args.push(String(duration)); - } - await this.run(args); - }); - } - - async swipe( - deviceId: string, - startX: number, - startY: number, - endX: number, - endY: number, - duration?: number, - ): Promise { - await timedInput(this.kind, 'swipe', deviceId, async () => { - const args = [ - deviceId, 'swipe', - String(startX), String(startY), - String(endX), String(endY), - ]; - if (duration !== undefined && duration > 0) { - args.push(String(duration)); - } - await this.run(args); - }); - } - - async typeText(deviceId: string, text: string, delayMs = 0): Promise { - await timedInput(this.kind, 'typeText', deviceId, async () => { - // Printable US-ASCII only. Each character is mapped to a US-keyboard - // HID usage and sent as an independent event. Shifted characters - // (uppercase letters, symbols like `@!#$%^&*()_+{}|:"<>?~`) are sent - // via the bridge's `key-mod` subcommand which holds LeftShift around - // the key press. Tab, newline, DEL, and non-ASCII characters have no - // mapping and are rejected; higher layers should compose those via - // WebKit/Flutter/simctl backends instead. - // - // When delayMs > 0 an inter-character pause is inserted between - // consecutive key sends. This is required for segmented OTP-style - // inputs (e.g. 6-cell verify-code fields in Flutter) that drop - // characters when keys arrive in rapid succession (issue #639). - let first = true; - for (const ch of text) { - const key = asciiToHidKey(ch); - if (key === null) { - throw new InputBackendError( - `SimulatorKitHIDInputBackend.typeText: unsupported character '${ch}' ` + - '(no HID mapping). Only printable US-ASCII (U+0020..U+007E) is ' + - 'supported; tab, newline, and non-ASCII characters are not. ' + - 'Track follow-up in issue #483.', - 'BAD_ARGS', - ); - } - if (!first && delayMs > 0) { - await sleep(delayMs); - } - first = false; - if (key.shift) { - await this.run([ - deviceId, - 'key-mod', - String(key.usage), - String(HID_USAGE_LEFT_SHIFT), - ]); - } else { - await this.run([deviceId, 'key', String(key.usage)]); - } - } - }); - } - - async keypress(deviceId: string, keyCode: string): Promise { - await timedInput(this.kind, 'keypress', deviceId, async () => { - // Accept either a decimal HID usage code or a key name known to our map. - const parsed = Number.parseInt(keyCode, 10); - const usage = Number.isNaN(parsed) ? KEY_NAME_TO_HID_USAGE[keyCode] : parsed; - if (usage === undefined) { - throw new InputBackendError( - `SimulatorKitHIDInputBackend.keypress: unknown HID key code "${keyCode}"`, - 'BAD_ARGS', - ); - } - await this.run([deviceId, 'key', String(usage)]); - }); - } - - async sendKey(deviceId: string, keyName: string): Promise { - await timedInput(this.kind, 'sendKey', deviceId, async () => { - const usage = KEY_NAME_TO_HID_USAGE[keyName]; - if (usage === undefined) { - throw new InputBackendError( - `SimulatorKitHIDInputBackend.pressKey: unknown key "${keyName}". ` + - `Supported: ${Object.keys(KEY_NAME_TO_HID_USAGE).join(', ')}`, - 'BAD_ARGS', - ); - } - await this.run([deviceId, 'key', String(usage)]); - }); - } - - /** Convenience alias: resolve a symbolic key name to its HID usage. */ - async pressKey(deviceId: string, key: string): Promise { - await this.sendKey(deviceId, key); - } - - /** - * SimulatorKitHIDInputBackend supports batching. Each `sim-hid-bridge` - * invocation is a short-lived child-process spawn (~10–50 ms of OS - * overhead on a typical macOS host). When a caller needs to dispatch N - * taps in quick succession (e.g. rapidly filling a PIN pad), using - * `tapBatch()` allows it to express intent at the logical-batch level - * rather than calling `tap()` N times in a loop — which is identical at - * the wire level but explicitly communicates that the events form a unit. - * Future optimisations (e.g. a single-spawn batch subcommand in the - * Swift bridge, if added later) can be transparently wired in here - * without changing callers. - * - * **Limitation**: only tap events are supported. Batching swipe, key, or - * key-mod events is not implemented because the use-case is less - * frequent and the bridge does not yet expose a multi-command subcommand - * that covers those event types. See issue #705 for the follow-up scope. - */ - supportsBatching(): boolean { - return true; - } - - /** - * Dispatch multiple tap events sequentially. Events are sent in order; - * if any event fails the batch stops and rejects with that error. - * - * Spawn count before this API: N calls × 1 spawn each = N spawns. - * Spawn count after using tapBatch: N spawns (same at the bridge level, - * because the Swift bridge handles one command per process). The benefit - * is reduced caller overhead (no repeated `getInputBackend()` resolution, - * no per-event telemetry scaffolding outside the batch boundary) and a - * clear extension point when the bridge gains a batch subcommand. - * - * **Not supported** for swipe, typeText, keypress, or sendKey — those - * operations use separate bridge subcommands that are not yet batched. - * Callers that mix event types must call the individual methods directly. - */ - async tapBatch(deviceId: string, events: BatchTapEvent[]): Promise { - for (const event of events) { - await this.tap(deviceId, event.x, event.y, event.duration); - } - } - - /** - * Press `keyUsage` while holding `modifierUsage` (e.g. Cmd+V = keyChord(25, 227)). - * Wraps the bridge's `key-mod` subcommand so callers can compose chords - * without shelling out manually. Used by the pasteboard typing path. - */ - async keyChord( - deviceId: string, - keyUsage: number, - modifierUsage: number, - ): Promise { - await timedInput(this.kind, 'keyChord', deviceId, async () => { - await this.run([ - deviceId, - 'key-mod', - String(keyUsage), - String(modifierUsage), - ]); - }); - } - - /** - * Spawn the bridge with the given argv (not including the bridge path) - * and parse its JSON stdout. Surfaces every documented exit code as a - * structured `InputBackendError`. - */ - private async run(args: string[]): Promise { - if (!warnedAboutPrivateAPI) { - warnedAboutPrivateAPI = true; - console.error( - '[opensafari] SimulatorKitHIDInputBackend uses private Apple frameworks ' + - '(SimulatorKit.framework, CoreSimulator.framework) via dlopen. ' + - 'These APIs are undocumented and Xcode updates may break them. ' + - 'Where can I use this? macOS host / CI only — never bundle inside an ' + - 'iOS .ipa shipped to the App Store or TestFlight. ' + - PRIVATE_API_DOC_REF + - ' (see "Deployment scope").', - ); - } - const { cmd, cmdArgs } = this.resolveSpawn(args); - let stdout = ''; - let stderr = ''; - try { - const result = await execFileAsync(cmd, cmdArgs, { - timeout: SPAWN_TIMEOUT_MS, - maxBuffer: 1 * 1024 * 1024, - }); - stdout = result.stdout ?? ''; - stderr = result.stderr ?? ''; - } catch (err) { - const e = err as NodeJS.ErrnoException & { - stdout?: string; - stderr?: string; - code?: number | string; - killed?: boolean; - }; - stdout = e.stdout ?? ''; - stderr = e.stderr ?? ''; - - if (e.killed && e.code === null) { - throw new InputBackendError( - `sim-hid-bridge timed out after ${SPAWN_TIMEOUT_MS}ms`, - 'SPAWN_TIMEOUT', - stderr, - ); - } - - const exit = typeof e.code === 'number' ? e.code : undefined; - const classified = codeForExit(exit); - const hint = stderr.trim() || stdout.trim() || e.message; - // Attach the private-APIs doc pointer to every SimulatorKit-layer - // failure so MCP clients / CI logs link directly to the BC-break - // response playbook rather than surfacing a bare exit code. - const docSuffix = - classified === 'SIMULATORKIT_UNAVAILABLE' || classified === 'NOT_IMPLEMENTED' - ? ` (${PRIVATE_API_DOC_REF})` - : ''; - throw new InputBackendError( - `sim-hid-bridge exited ${exit ?? '?'}: ${hint}${docSuffix}`, - classified, - stderr, - ); - } - - // Successful spawn: parse the JSON envelope. A bridge that exits 0 but - // emits `{ ok: false, ... }` is treated as a structured failure too. - if (!stdout.trim()) { - return {}; - } - try { - const parsed = JSON.parse(stdout) as { ok?: boolean; error?: string; code?: string }; - if (parsed.ok === false) { - const okFalseCode = (parsed.code as InputBackendErrorCode | undefined) ?? 'UNKNOWN'; - const frameworkFailureCodes = new Set([ - 'SIMULATORKIT_MISSING', - 'CORESIMULATOR_MISSING', - 'HID_CLIENT_FAILED', - 'HID_FUNCTIONS_MISSING', - ]); - const okFalseDocSuffix = frameworkFailureCodes.has(parsed.code ?? '') ? ` (${PRIVATE_API_DOC_REF})` : ''; - throw new InputBackendError( - `${parsed.error ?? 'sim-hid-bridge reported ok=false'}${okFalseDocSuffix}`, - okFalseCode, - stderr, - ); - } - return parsed; - } catch (err) { - if (err instanceof InputBackendError) throw err; - const safeStdout = stdout - .slice(0, 200) - // Strip ASCII control / DEL so a crafted bridge payload can't inject - // ANSI escapes or JSON-RPC framing into MCP server logs. - .replace(/[\x00-\x1f\x7f]/g, '?'); - throw new InputBackendError( - `sim-hid-bridge produced non-JSON stdout: ${safeStdout}`, - 'JSON_PARSE_FAILURE', - stderr, - ); - } - } - - /** - * Decide how to invoke the bridge: as a compiled binary, or via the `swift` - * interpreter when only the .swift source is present (PoC fallback). - */ - private resolveSpawn(args: string[]): { cmd: string; cmdArgs: string[] } { - if (this.bridgePath.endsWith('.swift')) { - return { cmd: 'swift', cmdArgs: [this.bridgePath, ...args] }; - } - return { cmd: this.bridgePath, cmdArgs: args }; - } -} - -/** - * Attempt to locate a usable sim-hid-bridge. Returns a ready-to-use backend - * or `null` if the helper is not installed on this machine. Callers are - * expected to fall through to another tier in that case. - * - * Lookup order: - * 1. Compiled binary at `dist/sim-hid-bridge` (next to `dist/ax-bridge`). - * 2. Swift source at `dist/sim-hid-bridge.swift` (post-build copy). - * 3. Source tree fallback at `src/native/sim-hid-bridge.swift` — DEV ONLY, - * gated behind `OPENSAFARI_ALLOW_SWIFT_INTERPRETER=1`. The repo-relative - * path escapes `dist/` when the package is installed as a dependency, - * and executing unsigned Swift source via the interpreter sidesteps any - * future codesigning we add to the compiled binary, so this candidate - * is intentionally NOT auto-discovered in production installs. - */ -export async function tryCreateSimulatorKitHIDBackend(): Promise< - SimulatorKitHIDInputBackend | null -> { - const candidates = [ - // Compiled binary co-located with ax-bridge after build. - path.resolve(__dirname, '..', 'sim-hid-bridge'), - path.resolve(__dirname, 'sim-hid-bridge'), - // Swift source copied into dist/ by the postbuild step. - path.resolve(__dirname, '..', 'sim-hid-bridge.swift'), - path.resolve(__dirname, 'sim-hid-bridge.swift'), - ]; - if (process.env.OPENSAFARI_ALLOW_SWIFT_INTERPRETER === '1') { - candidates.push( - path.resolve(__dirname, '..', '..', 'src', 'native', 'sim-hid-bridge.swift'), - ); - } - for (const candidate of candidates) { - if (existsSync(candidate)) { - return new SimulatorKitHIDInputBackend(candidate); - } - } - const searched = candidates.map((c) => ` - ${c}`).join('\n'); - throw new InputBackendError( - `sim-hid-bridge not found. Searched:\n${searched}\n` + - 'Run npm run build or set OPENSAFARI_ALLOW_SWIFT_INTERPRETER=1 for dev mode.', - 'HID_BRIDGE_MISSING', - ); -} +export { + resetSimHidPrivateAPIWarning, + InputBackendError, + SimulatorKitHIDInputBackend, + tryCreateSimulatorKitHIDBackend, +} from '../input/sim-hid-backend'; -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} +export type { InputBackendErrorCode } from '../input/sim-hid-backend'; diff --git a/tests/unit/audit-logger.test.ts b/tests/unit/audit-logger.test.ts index 7d3033fb..a26d7420 100644 --- a/tests/unit/audit-logger.test.ts +++ b/tests/unit/audit-logger.test.ts @@ -140,6 +140,10 @@ describe('audit logger', () => { value: 'selected-secret-option', label: 'safe label', }, + items: [ + { text: 'password typed into field' }, + { value: 'token pasted into selector' }, + ], }); const [entry] = readAuditEntries(tmpHome); @@ -148,9 +152,15 @@ describe('audit logger', () => { expect(serialized).not.toContain('typed-secret-password'); expect(serialized).not.toContain('selected-secret-option'); + expect(serialized).not.toContain('password typed into field'); + expect(serialized).not.toContain('token pasted into selector'); expect(summary.text).toBe('[REDACTED]'); expect((summary.nested as Record).value).toBe('[REDACTED]'); expect((summary.nested as Record).label).toBe('safe label'); + expect(summary.items).toEqual([ + { text: '[REDACTED]' }, + { value: '[REDACTED]' }, + ]); }); it('retries log target setup after a transient initialization error', () => { diff --git a/tests/unit/input-backend-resolver.test.ts b/tests/unit/input-backend-resolver.test.ts new file mode 100644 index 00000000..2ffef335 --- /dev/null +++ b/tests/unit/input-backend-resolver.test.ts @@ -0,0 +1,309 @@ +/** + * Unit tests for InputBackendResolver class (#707 a). + * + * Verifies: + * - Instance state is independent across instances + * - reset() clears all caches + * - Fallback order matches the documented tier chain + */ + +import { InputBackendResolver } from '../../src/input/backend-resolver'; +import { SimctlInputBackend } from '../../src/input/simctl-backend'; +import { AppleScriptInputBackend } from '../../src/input/applescript-backend'; +import { WebKitInputBackend } from '../../src/input/webkit-backend'; +import { FlutterVMInputBackend } from '../../src/tools/flutter-vm-input-backend'; + +// ── Mocks ────────────────────────────────────────────────────────────────── + +/* eslint-disable no-var */ +var execMock = jest.fn().mockResolvedValue(''); +/* eslint-enable no-var */ + +jest.mock('../../src/simulator/simctl', () => ({ + SimctlExecutor: jest.fn().mockImplementation(() => ({ + exec: execMock, + })), +})); + +jest.mock('child_process', () => ({ execFile: jest.fn() })); +jest.mock('util', () => ({ + ...jest.requireActual('util'), + promisify: () => (...args: unknown[]) => execMock(...args), +})); + +// ── Helpers ──────────────────────────────────────────────────────────────── + +const DEVICE = 'TEST-RESOLVER-UDID'; +const DEVICE_B = 'TEST-RESOLVER-UDID-B'; + +const nullFlutterResolver = async () => null; + +// ── Instance isolation ───────────────────────────────────────────────────── + +describe('InputBackendResolver — instance isolation', () => { + beforeEach(() => { + execMock.mockClear(); + }); + + test('two instances have independent simctl probe caches', async () => { + const resolverA = new InputBackendResolver(); + const resolverB = new InputBackendResolver(); + resolverA.setFlutterVMResolver(nullFlutterResolver); + resolverB.setFlutterVMResolver(nullFlutterResolver); + + // A: simctl succeeds + execMock.mockResolvedValueOnce(''); // probe A + const backendA = await resolverA.getInputBackend(DEVICE); + expect(backendA).toBeInstanceOf(SimctlInputBackend); + + // B: simctl fails + execMock.mockRejectedValueOnce(new Error('not supported')); + process.env['OPENSAFARI_ALLOW_FOCUS_INPUT'] = '1'; + const backendB = await resolverB.getInputBackend(DEVICE_B); + process.env['OPENSAFARI_ALLOW_FOCUS_INPUT'] = undefined; + delete process.env['OPENSAFARI_ALLOW_FOCUS_INPUT']; + + // A still returns simctl (cached), B returns applescript + expect(backendA).toBeInstanceOf(SimctlInputBackend); + expect(backendB).toBeInstanceOf(AppleScriptInputBackend); + }); + + test('two instances have independent Flutter VM caches', async () => { + const resolverA = new InputBackendResolver(); + const resolverB = new InputBackendResolver(); + + const fakeClient = { isConnected: () => true, evaluate: jest.fn() } as any; + + // A gets Flutter, B gets null (falls through to simctl) + resolverA.setFlutterVMResolver(async () => fakeClient); + resolverB.setFlutterVMResolver(nullFlutterResolver); + + const backendA = await resolverA.getInputBackend(DEVICE); + execMock.mockResolvedValueOnce(''); // B's simctl probe + const backendB = await resolverB.getInputBackend(DEVICE); + + expect(backendA).toBeInstanceOf(FlutterVMInputBackend); + expect(backendB).toBeInstanceOf(SimctlInputBackend); + }); + + test('Flutter cache size is per-instance', async () => { + const resolverA = new InputBackendResolver(); + const resolverB = new InputBackendResolver(); + resolverA.setFlutterVMResolver(nullFlutterResolver); + resolverB.setFlutterVMResolver(nullFlutterResolver); + + execMock.mockResolvedValue(''); + await resolverA.getInputBackend(DEVICE); + await resolverA.getInputBackend(DEVICE_B); + + // A made 2 probes (one per device); B has not been used + // Flutter cache is distinct from simctl; sizes may be 0 here since + // resolver returns null without caching on our stub + expect(resolverA.getFlutterClientCacheSize()).toBe(0); + expect(resolverB.getFlutterClientCacheSize()).toBe(0); + }); +}); + +// ── reset() behavior ────────────────────────────────────────────────────── + +describe('InputBackendResolver.reset()', () => { + beforeEach(() => { + execMock.mockClear(); + }); + + afterEach(() => { + delete process.env['OPENSAFARI_ALLOW_FOCUS_INPUT']; + }); + + test('reset() clears simctl detection cache — probe runs again after reset', async () => { + const resolver = new InputBackendResolver(); + resolver.setFlutterVMResolver(nullFlutterResolver); + + execMock.mockResolvedValue(''); + await resolver.getInputBackend(DEVICE); + const callsBeforeReset = execMock.mock.calls.length; + + resolver.reset(); + resolver.setFlutterVMResolver(nullFlutterResolver); + execMock.mockResolvedValue(''); + await resolver.getInputBackend(DEVICE); + + // Probe must have run again after reset + expect(execMock.mock.calls.length).toBeGreaterThan(callsBeforeReset); + }); + + test('reset() clears cached simctl backend singleton — new instance returned', async () => { + const resolver = new InputBackendResolver(); + resolver.setFlutterVMResolver(nullFlutterResolver); + + execMock.mockResolvedValue(''); + const first = await resolver.getInputBackend(DEVICE); + + resolver.reset(); + resolver.setFlutterVMResolver(nullFlutterResolver); + execMock.mockResolvedValue(''); + const second = await resolver.getInputBackend(DEVICE); + + expect(first).toBeInstanceOf(SimctlInputBackend); + expect(second).toBeInstanceOf(SimctlInputBackend); + // After reset the singleton is re-created — different object reference + expect(first).not.toBe(second); + }); + + test('reset() clears focusInputOptInWarned so warning fires again', async () => { + const resolver = new InputBackendResolver(); + resolver.setFlutterVMResolver(nullFlutterResolver); + process.env['OPENSAFARI_ALLOW_FOCUS_INPUT'] = '1'; + + const spy = jest.spyOn(console, 'error').mockImplementation(() => {}); + + execMock.mockRejectedValueOnce(new Error('not supported')); + await resolver.getInputBackend(DEVICE); + + const warnsBefore = spy.mock.calls.filter((c) => + String(c[0]).includes('AppleScript/CGEvent backend is enabled'), + ).length; + + resolver.reset(); + resolver.setFlutterVMResolver(nullFlutterResolver); + execMock.mockRejectedValueOnce(new Error('not supported')); + await resolver.getInputBackend(DEVICE); + + const warnsAfter = spy.mock.calls.filter((c) => + String(c[0]).includes('AppleScript/CGEvent backend is enabled'), + ).length; + + expect(warnsBefore).toBe(1); + expect(warnsAfter).toBe(2); // warning fired again after reset + spy.mockRestore(); + }); + + test('reset() clears Flutter VM resolver override', async () => { + const resolver = new InputBackendResolver(); + const fakeClient = { isConnected: () => true, evaluate: jest.fn() } as any; + resolver.setFlutterVMResolver(async () => fakeClient); + + const before = await resolver.getInputBackend(DEVICE); + expect(before).toBeInstanceOf(FlutterVMInputBackend); + + // After reset(), the override is gone — flutter resolver reverts to default + // (which will return null since no real VM is running in unit tests). + // Stub it back to null so tier 1 kicks in. + resolver.reset(); + resolver.setFlutterVMResolver(nullFlutterResolver); + execMock.mockResolvedValueOnce(''); + const after = await resolver.getInputBackend(DEVICE); + expect(after).toBeInstanceOf(SimctlInputBackend); + }); +}); + +// ── Fallback order (asserted, not just commented) ───────────────────────── + +describe('InputBackendResolver — fallback order', () => { + let resolver: InputBackendResolver; + + beforeEach(() => { + execMock.mockClear(); + resolver = new InputBackendResolver(); + resolver.setFlutterVMResolver(nullFlutterResolver); + delete process.env['OPENSAFARI_ALLOW_FOCUS_INPUT']; + delete process.env['OPENSAFARI_HEADLESS_ONLY']; + }); + + afterEach(() => { + delete process.env['OPENSAFARI_ALLOW_FOCUS_INPUT']; + delete process.env['OPENSAFARI_HEADLESS_ONLY']; + }); + + test('Tier 0 (Flutter VM) wins over all lower tiers', async () => { + const fakeClient = { isConnected: () => true, evaluate: jest.fn() } as any; + resolver.setFlutterVMResolver(async () => fakeClient); + + const mockWebKit = { isConnected: () => true } as any; + const backend = await resolver.getInputBackend(DEVICE, mockWebKit); + + expect(backend).toBeInstanceOf(FlutterVMInputBackend); + expect(backend.kind).toBe('flutter-vm'); + // simctl probe must NOT run + expect(execMock).not.toHaveBeenCalled(); + }); + + test('Tier 2 simctl wins over WebKit when simctl probe succeeds', async () => { + execMock.mockResolvedValueOnce(''); // simctl probe succeeds + const mockWebKit = { isConnected: () => true } as any; + const backend = await resolver.getInputBackend(DEVICE, mockWebKit); + + expect(backend).toBeInstanceOf(SimctlInputBackend); + expect(backend.kind).toBe('simctl'); + }); + + test('Tier 2 WebKit used when simctl probe fails and client is connected', async () => { + execMock.mockRejectedValueOnce(new Error('not supported')); + const mockWebKit = { isConnected: () => true } as any; + const backend = await resolver.getInputBackend(DEVICE, mockWebKit); + + expect(backend).toBeInstanceOf(WebKitInputBackend); + expect(backend.kind).toBe('webkit'); + }); + + test('Tier 3 AppleScript used when simctl and WebKit both fail (opt-in set)', async () => { + process.env['OPENSAFARI_ALLOW_FOCUS_INPUT'] = '1'; + execMock.mockRejectedValueOnce(new Error('not supported')); + + const backend = await resolver.getInputBackend(DEVICE); + + expect(backend).toBeInstanceOf(AppleScriptInputBackend); + expect(backend.kind).toBe('applescript'); + }); + + test('throws HeadlessInputUnavailableError when no headless backend and no opt-in', async () => { + execMock.mockRejectedValueOnce(new Error('not supported')); + const { HeadlessInputUnavailableError } = await import('../../src/input/backend-resolver'); + + await expect(resolver.getInputBackend(DEVICE)).rejects.toBeInstanceOf( + HeadlessInputUnavailableError, + ); + }); + + test('Tier 0 → Tier 2 simctl fallback when Flutter resolver returns null', async () => { + resolver.setFlutterVMResolver(nullFlutterResolver); + execMock.mockResolvedValueOnce(''); + + const backend = await resolver.getInputBackend(DEVICE); + + expect(backend).toBeInstanceOf(SimctlInputBackend); + expect(backend.kind).toBe('simctl'); + }); + + test('tier order: Tier 0 > Tier 2 simctl > Tier 2 WebKit > Tier 3 AppleScript', async () => { + // Verify each tier is tried in order by checking kind values. + const kindResults: string[] = []; + + // Step 1: Flutter (Tier 0) + const r0 = new InputBackendResolver(); + const fakeClient = { isConnected: () => true, evaluate: jest.fn() } as any; + r0.setFlutterVMResolver(async () => fakeClient); + kindResults.push((await r0.getInputBackend(DEVICE)).kind); + + // Step 2: Simctl (Tier 2) — Flutter null, simctl succeeds + const r2s = new InputBackendResolver(); + r2s.setFlutterVMResolver(nullFlutterResolver); + execMock.mockResolvedValueOnce(''); + kindResults.push((await r2s.getInputBackend(DEVICE)).kind); + + // Step 3: WebKit (Tier 2) — Flutter null, simctl fails, webkit connected + const r2w = new InputBackendResolver(); + r2w.setFlutterVMResolver(nullFlutterResolver); + execMock.mockRejectedValueOnce(new Error('not supported')); + kindResults.push((await r2w.getInputBackend(DEVICE, { isConnected: () => true } as any)).kind); + + // Step 4: AppleScript (Tier 3) — Flutter null, simctl fails, no webkit, opt-in + process.env['OPENSAFARI_ALLOW_FOCUS_INPUT'] = '1'; + const r3 = new InputBackendResolver(); + r3.setFlutterVMResolver(nullFlutterResolver); + execMock.mockRejectedValueOnce(new Error('not supported')); + kindResults.push((await r3.getInputBackend(DEVICE)).kind); + + expect(kindResults).toEqual(['flutter-vm', 'simctl', 'webkit', 'applescript']); + }); +}); diff --git a/tests/unit/memory-budget.test.ts b/tests/unit/memory-budget.test.ts index 05589309..1c3e48d9 100644 --- a/tests/unit/memory-budget.test.ts +++ b/tests/unit/memory-budget.test.ts @@ -79,7 +79,7 @@ const CONSTANT_CONTRACTS: Array<{ value: 100, }, { - relPath: 'src/tools/native-input-backend.ts', + relPath: 'src/input/flutter-resolver.ts', name: 'NEGATIVE_CACHE_TTL_MS', value: 30_000, }, diff --git a/tests/unit/native-input-backend.test.ts b/tests/unit/native-input-backend.test.ts index 472bef1e..1f5032f2 100644 --- a/tests/unit/native-input-backend.test.ts +++ b/tests/unit/native-input-backend.test.ts @@ -573,7 +573,8 @@ describe('getInputBackend', () => { const originalEnv = process.env[OPENSAFARI_ALLOW_FOCUS_INPUT_ENV]; beforeEach(() => { - execMock.mockClear(); + execMock.mockReset(); + execMock.mockResolvedValue(''); resetInputBackend(); delete process.env[OPENSAFARI_ALLOW_FOCUS_INPUT_ENV]; // Ensure Tier-0 routing does not accidentally grab the real @@ -595,7 +596,7 @@ describe('getInputBackend', () => { expect(backend).toBeInstanceOf(SimctlInputBackend); expect(backend.kind).toBe('simctl'); expect(execMock).toHaveBeenCalledWith( - ['io', DEVICE, 'input', 'tap', '0', '0'], + ['help', 'io'], { timeout: 5000 }, ); }); @@ -687,13 +688,11 @@ describe('getInputBackend', () => { }); test('returns SimHID backend when probe succeeds (no webkitClient)', async () => { - execMock.mockRejectedValueOnce(new Error('not supported')); const backend = await getInputBackend(DEVICE); expect(backend.kind).toBe('simhid'); }); test('returns SimHID backend ahead of WebKit when probe succeeds', async () => { - execMock.mockRejectedValueOnce(new Error('not supported')); const mockClient = { isConnected: jest.fn().mockReturnValue(true), connect: jest.fn().mockResolvedValue(undefined), @@ -728,28 +727,24 @@ describe('getInputBackend', () => { test('returns pointer-service backend when opt-in flag is set', async () => { process.env[ENABLE_PS] = '1'; - execMock.mockRejectedValueOnce(new Error('not supported')); const backend = await getInputBackend(DEVICE); expect(backend.kind).toBe('pointer-service'); }); test('falls through to SimHID when opt-in flag is unset', async () => { delete process.env[ENABLE_PS]; - execMock.mockRejectedValueOnce(new Error('not supported')); const backend = await getInputBackend(DEVICE); expect(backend.kind).toBe('simhid'); }); test('falls through to SimHID when opt-in flag has any non-truthy value', async () => { process.env[ENABLE_PS] = 'maybe'; - execMock.mockRejectedValueOnce(new Error('not supported')); const backend = await getInputBackend(DEVICE); expect(backend.kind).toBe('simhid'); }); test('returns pointer-service even when a WebKit client is attached (tier order preserved)', async () => { process.env[ENABLE_PS] = '1'; - execMock.mockRejectedValueOnce(new Error('not supported')); const mockClient = { isConnected: jest.fn().mockReturnValue(true), connect: jest.fn().mockResolvedValue(undefined),