diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..6f01eac9 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,108 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Oracle (`@steipete/oracle`) is a CLI tool that wraps OpenAI's Responses API to query multiple AI models (GPT-5.x, Gemini 3.x, Claude 4.x) with file context. It supports API mode, browser automation (ChatGPT/Gemini via Chrome DevTools Protocol), MCP server integration, and remote bridge execution. + +## Commands + +```bash +# Package manager: pnpm (10.23.0) +pnpm install # Install dependencies + +# Build +pnpm run build # TypeScript compile + copy vendor files + +# Lint & Format (oxlint + oxfmt, NOT ESLint/Prettier) +pnpm run check # format:check + lint (runs in CI) +pnpm run lint # typecheck + oxlint +pnpm run lint:fix # oxlint --fix + oxfmt +pnpm run format # oxfmt --write +pnpm run typecheck # tsc --noEmit + +# Tests (Vitest) +pnpm test # Run all unit tests +pnpm vitest run tests/oracle/run.test.ts # Single test file +pnpm vitest run -t "test name pattern" # Single test by name +pnpm test:coverage # Unit tests with v8 coverage +pnpm test:mcp # Build + MCP unit + mcporter integration +pnpm test:browser # Browser automation smokes (needs Chrome on port 45871) +ORACLE_LIVE_TEST=1 pnpm test:live # Live API tests (costs real tokens) +ORACLE_LIVE_TEST=1 pnpm test:pro # Pro model tests (10+ min) +``` + +## Architecture + +``` +bin/ + oracle-cli.ts # CLI entry point (commander-based, 1700+ lines) + oracle-mcp.ts # MCP server entry point + +src/ + oracle/ # Core engine + run.ts # Main orchestrator — assembles prompt, calls API, streams response + client.ts # API client factory (OpenAI, Azure, Gemini, custom endpoints) + modelResolver.ts # Model name → provider routing logic + files.ts # File globbing + token estimation + multiModelRunner.ts # Parallel multi-model execution + gemini.ts / claude.ts # Provider-specific adapters + + browser/ # Chrome DevTools Protocol automation + index.ts # Core browser orchestrator (largest file) + chromeLifecycle.ts # Chrome launch/teardown via chrome-launcher + cookies.ts # Cookie sync (sweet-cookie for macOS Keychain) + reattach.ts # Session recovery on navigation/crash + actions/ # DOM interaction modules + assistantResponse.ts # Capture AI response from page + attachments.ts # File/image upload automation + promptComposer.ts # Type prompt into chat input + modelSelection.ts # Pick model from ChatGPT dropdown + navigation.ts # URL/iframe handling + providers/ # DOM selector definitions per site + chatgptDomProvider.ts + geminiDeepThinkDomProvider.ts + + cli/ # CLI layer + options.ts # Commander option definitions + sessionRunner.ts # Executes a single oracle run + sessionDisplay.ts # Terminal output rendering + browserConfig.ts # Browser flag aggregation + tui/ # Interactive terminal UI (excluded from coverage) + + gemini-web/ # Browser-based Gemini client (no API key needed) + remote/ # Remote Chrome bridge (server + client) + bridge/ # MCP/Codex bridge connection + mcp/ # Model Context Protocol server + tools + sessionManager.ts # Session CRUD (stored in ~/.oracle/sessions/) + config.ts # Global config (~/.oracle/config.json, JSON5) +``` + +### Key Patterns + +- **Engine selection**: API (default when `OPENAI_API_KEY` set) vs Browser (Chrome automation). Controlled by `--engine api|browser` or `ORACLE_ENGINE` env var. +- **Model routing**: `modelResolver.ts` maps model strings to providers. Supports OpenAI, Azure OpenAI, Gemini (API + web), Claude, OpenRouter, Grok, and custom endpoints. +- **Session persistence**: Every run creates a session under `~/.oracle/sessions//` with metadata, prompt, and response. Sessions can be listed (`oracle status`), replayed (`oracle session `), or restarted (`oracle restart `). +- **Path aliases**: `@src/*` → `src/*`, `@tests/*` → `tests/*` (configured in tsconfig.json and vitest.config.ts). + +## Code Style + +- **Formatter**: oxfmt — 2 spaces, 100 char width, double quotes, trailing commas, semicolons. +- **Linter**: oxlint with plugins: unicorn, typescript, oxc. Categories correctness/perf/suspicious = error. +- **TypeScript**: Strict mode, ES2022 target, ESNext modules, bundler resolution. +- **Module system**: ESM (`"type": "module"` in package.json). Use `.ts` extensions in imports. + +## Testing Notes + +- Test setup (`tests/setup-env.ts`) injects fake API keys and isolates session storage to `/tmp/oracle-tests-{pid}`. Non-live tests never hit real APIs. +- Live tests are opt-in via `ORACLE_LIVE_TEST=1` env var and require real API keys. +- Browser smoke tests expect Chrome on DevTools port 45871. +- MCP tests require building first (`pnpm run build`). + +## AGENTS.md Highlights + +- CLI banner uses the oracle emoji: `🧿 oracle () ...` — only on initial headline and TUI exit. +- Browser Pro runs: never click "Answer now" — wait for the real response (up to 10 min). +- Before release, check `docs/manual-tests.md` for relevant smoke tests. +- After finishing a feature, update CHANGELOG if it affects end users (read top ~100 lines first, group related edits). diff --git a/bin/oracle-cli.ts b/bin/oracle-cli.ts index f8768328..6073995d 100755 --- a/bin/oracle-cli.ts +++ b/bin/oracle-cli.ts @@ -142,6 +142,7 @@ interface CliOptions extends OptionValues { browserManualLogin?: boolean; browserManualLoginProfileDir?: string; browserThinkingTime?: "light" | "standard" | "extended" | "heavy"; + deepResearch?: boolean; browserAllowCookieErrors?: boolean; browserAttachments?: string; browserInlineFiles?: boolean; @@ -592,6 +593,13 @@ program .choices(["light", "standard", "extended", "heavy"]) .hideHelp(), ) + .option( + "--deep-research", + "Use ChatGPT Deep Research mode (browser engine only). " + + "Activates autonomous web research that takes 5-30 minutes. " + + "Requires ChatGPT Plus or Pro subscription.", + false, + ) .addOption( new Option( "--browser-allow-cookie-errors", @@ -1328,6 +1336,17 @@ async function runRootCommand(options: CliOptions): Promise { options.baseUrl = userConfig.apiBaseUrl; } + // --deep-research implies browser engine and validates constraints + if (options.deepResearch) { + if (engine !== "browser" && preferredEngine === "api") { + throw new Error("--deep-research requires --engine browser."); + } + engine = "browser"; + if (options.models && options.models.length > 0) { + throw new Error("--deep-research cannot be combined with --models (multi-model runs)."); + } + } + if (remoteHost && engine !== "browser") { throw new Error("--remote-host requires --engine browser."); } diff --git a/docs/deep-research-plan/00-overview.md b/docs/deep-research-plan/00-overview.md new file mode 100644 index 00000000..c5e37a20 --- /dev/null +++ b/docs/deep-research-plan/00-overview.md @@ -0,0 +1,117 @@ +# Deep Research Browser Automation — Implementation Plan + +## Goal + +Add ChatGPT Deep Research support to Oracle's browser automation engine, enabling users to trigger Deep Research from the CLI and receive structured research reports — all using their existing ChatGPT subscription (no API cost). + +## Motivation + +- ChatGPT Deep Research is a powerful autonomous research agent that browses the web for 5-30 minutes and produces comprehensive cited reports +- OpenAI offers a Deep Research API (`o3-deep-research`, `o4-mini-deep-research`), but it costs ~$10/M input + $40/M output tokens per run +- Users with ChatGPT Plus/Pro subscriptions already have Deep Research included — browser automation lets them use it programmatically at no extra cost +- Oracle already has mature ChatGPT browser automation; extending it for Deep Research is a natural fit + +## Usage + +```bash +# Basic Deep Research +oracle --deep-research -p "Research the latest trends in AI agent frameworks in 2026" + +# With file context +oracle --deep-research -p "Analyze this codebase architecture" --file "src/**/*.ts" + +# With custom timeout (default 40 minutes) +oracle --deep-research --timeout 60m -p "Comprehensive market analysis of EV industry" +``` + +## Architecture Decision: Iframe Handling + +The research plan confirmation UI renders in a **cross-origin iframe** (640x400px), making direct DOM manipulation from the main page impossible. Three options were evaluated: + +| Option | Approach | Complexity | Robustness | +|--------|----------|------------|------------| +| **A. Wait for auto-confirm** | Start button has ~60s countdown that auto-confirms | Low | High | +| B. CDP iframe targeting | Use `Target.getTargets()` to find iframe execution context | High | Medium | +| C. Coordinate-based clicking | Use `Input.dispatchMouseEvent` at computed coordinates | Medium | Low | + +**Decision: Option A.** The auto-confirm countdown eliminates the need to interact with the iframe at all. After detecting the iframe appears, simply wait ~70 seconds for auto-confirmation. This is the most robust approach and matches natural user behavior. + +## Implementation Phases + +| Phase | Scope | Doc | +|-------|-------|-----| +| 1 | Types, Config, CLI Flag | [01-types-and-config.md](01-types-and-config.md) | +| 2 | Core Action Module (`deepResearch.ts`) | [02-core-actions.md](02-core-actions.md) | +| 3 | Main Flow Integration (`index.ts`) | [03-flow-integration.md](03-flow-integration.md) | +| 4 | Reattach & Session Support | [04-reattach-and-sessions.md](04-reattach-and-sessions.md) | +| 5 | Testing Strategy | [05-testing.md](05-testing.md) | + +## UI Flow (Discovered via Live Exploration) + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Phase 1: Activate Deep Research Mode │ +│ │ +│ [+] button → radix dropdown → "Deep research" item │ +│ Result: "Thinking" pill → "Deep research" pill │ +│ + "Apps" and "Sites" buttons appear │ +└────────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────────▼────────────────────────────────────┐ +│ Phase 2: Submit Prompt │ +│ │ +│ Type prompt in textbox → click [send-button] │ +│ URL changes to /c/{conversation-id} │ +└────────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────────▼────────────────────────────────────┐ +│ Phase 3: Research Plan (CROSS-ORIGIN IFRAME) │ +│ │ +│ ┌──────────────────────────────────────┐ │ +│ │ "AI agent frameworks trends" │ │ +│ │ ○ Survey academic papers... │ │ +│ │ ○ Review documentation... │ │ +│ │ ○ Analyze blog posts... │ │ +│ │ │ │ +│ │ [Edit] [Cancel] [Start (53)] │ │ +│ └──────────────────────────────────────┘ │ +│ Auto-confirms after ~60 second countdown │ +└────────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────────▼────────────────────────────────────┐ +│ Phase 4: Research Execution (5-30 minutes) │ +│ │ +│ Status updates in iframe: "Researching..." │ +│ "Considering methods for framework comparison..." │ +│ [Update] button visible in iframe │ +└────────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────────▼────────────────────────────────────┐ +│ Phase 5: Report Complete │ +│ │ +│ Iframe disappears, full markdown report in conversation │ +│ Copy/Rate buttons appear (FINISHED_ACTIONS_SELECTOR) │ +│ Extract text via existing assistantResponse.ts │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Key DOM Selectors + +| Element | Selector | Notes | +|---------|----------|-------| +| "+" button | `[data-testid="composer-plus-btn"]` | Opens radix dropdown | +| Deep Research menu item | `[data-radix-collection-item]` text="Deep research" | No `data-testid` | +| Deep Research pill | `.__composer-pill-composite` with aria "Deep research" | Replaces Thinking pill | +| Send button | `[data-testid="send-button"]` | Same as normal chat | +| Research plan iframe | `iframe.h-full.w-full` inside assistant turn | Cross-origin | +| Completion indicator | `FINISHED_ACTIONS_SELECTOR` (copy/rate buttons) | Existing constant | + +## Risks and Mitigations + +| Risk | Mitigation | +|------|-----------| +| ChatGPT changes Deep Research UI selectors | Use text-match "Deep research" as primary; multiple fallback selectors | +| Auto-confirm timer changes | Detect confirmation via iframe state change, not fixed timer | +| Research exceeds timeout | Default 40min timeout; `--timeout` override; reattach mechanism for interrupted runs | +| "+" button `data-testid` changes | Fallback: `button[aria-label*="Add files"]`, positional matching | +| Deep Research unavailable for account tier | Clear error message with subscription requirement info | diff --git a/docs/deep-research-plan/01-types-and-config.md b/docs/deep-research-plan/01-types-and-config.md new file mode 100644 index 00000000..e676aa9d --- /dev/null +++ b/docs/deep-research-plan/01-types-and-config.md @@ -0,0 +1,186 @@ +# Phase 1: Types, Config, and CLI Flag + +## Overview + +Add the `deepResearch` boolean flag throughout the configuration pipeline: TypeScript types → browser config defaults → CLI option → session persistence. + +## File Changes + +### 1.1 `src/browser/types.ts` + +Add `deepResearch` to `BrowserAutomationConfig` (after `thinkingTime` on line 61): + +```typescript +/** Enable ChatGPT Deep Research mode (browser engine only). */ +deepResearch?: boolean; +``` + +Add to `ResolvedBrowserConfig` (after `thinkingTime` on line 113): + +```typescript +deepResearch?: boolean; +``` + +Add to `BrowserRunOptions` (after `verbose` on line 75): + +```typescript +/** Whether this is a Deep Research run (affects timeouts and response detection). */ +deepResearch?: boolean; +``` + +### 1.2 `src/browser/config.ts` + +In `DEFAULT_BROWSER_CONFIG`, add: + +```typescript +deepResearch: false, +``` + +In `resolveBrowserConfig`, propagate the value and override timeouts when Deep Research is active: + +```typescript +const deepResearch = config.deepResearch ?? false; + +// Deep Research runs take 5-30 minutes; use generous default timeout +const effectiveTimeoutMs = deepResearch && !config.timeoutMs + ? 2_400_000 // 40 minutes + : resolvedTimeoutMs; +``` + +### 1.3 `src/browser/constants.ts` + +Add a new section for Deep Research selectors: + +```typescript +// Deep Research selectors +export const DEEP_RESEARCH_PLUS_BUTTON = '[data-testid="composer-plus-btn"]'; +export const DEEP_RESEARCH_DROPDOWN_ITEM_TEXT = 'Deep research'; +export const DEEP_RESEARCH_PILL_LABEL = 'Deep research'; +// Polling interval for Deep Research completion (5 seconds) +export const DEEP_RESEARCH_POLL_INTERVAL_MS = 5_000; +// Auto-confirm wait time (countdown ~60s + 10s safety margin) +export const DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS = 70_000; +// Default timeout for Deep Research completion (40 minutes) +export const DEEP_RESEARCH_DEFAULT_TIMEOUT_MS = 2_400_000; +``` + +### 1.4 `src/sessionStore.ts` (or `src/sessionManager.ts`) + +Add `deepResearch?: boolean` to `BrowserSessionConfig`: + +```typescript +export interface BrowserSessionConfig { + // ... existing fields ... + thinkingTime?: ThinkingTimeLevel; + deepResearch?: boolean; // <-- add +} +``` + +This ensures the session can be resumed with the correct Deep Research flag. + +### 1.5 `src/config.ts` (top-level) + +Add `deepResearch?: boolean` to `BrowserConfigDefaults`: + +```typescript +export interface BrowserConfigDefaults { + // ... existing fields ... + thinkingTime?: ThinkingTimeLevel; + deepResearch?: boolean; // <-- add +} +``` + +### 1.6 `src/cli/browserConfig.ts` + +Add to `BrowserFlagOptions`: + +```typescript +deepResearch?: boolean; +``` + +In `buildBrowserConfig`, propagate: + +```typescript +deepResearch: options.deepResearch ?? defaults?.deepResearch ?? false, +``` + +When `deepResearch` is true, override model strategy: + +```typescript +// Deep Research has its own mode; skip model picker interaction +if (deepResearch) { + modelStrategy = 'ignore'; +} +``` + +### 1.7 `src/cli/browserDefaults.ts` + +In `applyBrowserDefaultsFromConfig`, read from config file: + +```typescript +deepResearch: config.deepResearch ?? undefined, +``` + +### 1.8 `bin/oracle-cli.ts` + +Register the CLI flag (after `--browser-thinking-time`): + +```typescript +.addOption( + new Option( + "--deep-research", + "Use ChatGPT Deep Research mode (browser engine only). " + + "Activates autonomous web research that takes 5-30 minutes. " + + "Requires ChatGPT Plus or Pro subscription." + ).default(false) +) +``` + +In the options processing logic: + +```typescript +// --deep-research implies browser engine +if (options.deepResearch) { + if (!options.engine) { + options.engine = 'browser'; + } + if (options.engine !== 'browser') { + console.error('--deep-research requires --engine browser'); + process.exit(1); + } +} +``` + +Wire to browser config: + +```typescript +const browserConfig = buildBrowserConfig({ + // ... existing options ... + deepResearch: options.deepResearch, +}); +``` + +### 1.9 `~/.oracle/config.json` Support + +Users can set Deep Research as default in their config: + +```json5 +{ + "deepResearch": false, + // When deepResearch is true, these defaults make sense: + "timeout": "40m", + "engine": "browser" +} +``` + +## Validation Rules + +1. `--deep-research` is mutually exclusive with `--models` (multi-model runs don't make sense for Deep Research) +2. `--deep-research` forces `engine: "browser"` — API mode does not support this feature +3. `--deep-research` skips model selection (`modelStrategy: "ignore"`) +4. `--deep-research` skips thinking time selection (Deep Research replaces the Thinking pill) +5. When `--deep-research` is active and no explicit `--timeout` is given, default to 40 minutes + +## Dependencies + +None — this phase is pure type/config plumbing with no runtime logic. diff --git a/docs/deep-research-plan/02-core-actions.md b/docs/deep-research-plan/02-core-actions.md new file mode 100644 index 00000000..d9fd17cf --- /dev/null +++ b/docs/deep-research-plan/02-core-actions.md @@ -0,0 +1,487 @@ +# Phase 2: Core Action Module — `deepResearch.ts` + +## Overview + +Create `src/browser/actions/deepResearch.ts` — the core automation logic for activating, monitoring, and completing Deep Research runs. This module follows the same pattern as `thinkingTime.ts` and `modelSelection.ts`. + +## New File: `src/browser/actions/deepResearch.ts` + +### Imports + +```typescript +import type { ChromeClient, BrowserLogger } from "../types.js"; +import { + DEEP_RESEARCH_PLUS_BUTTON, + DEEP_RESEARCH_DROPDOWN_ITEM_TEXT, + DEEP_RESEARCH_PILL_LABEL, + DEEP_RESEARCH_POLL_INTERVAL_MS, + DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS, + DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, + FINISHED_ACTIONS_SELECTOR, + STOP_BUTTON_SELECTOR, +} from "../constants.js"; +import { delay } from "../utils.js"; +import { buildClickDispatcher } from "./domEvents.js"; +import { BrowserAutomationError } from "../../oracle/errors.js"; +``` + +### Function 1: `activateDeepResearch` + +Activates the Deep Research mode by clicking the "+" button and selecting "Deep research" from the dropdown. + +```typescript +export async function activateDeepResearch( + Runtime: ChromeClient["Runtime"], + Input: ChromeClient["Input"], + logger: BrowserLogger, +): Promise +``` + +**DOM Expression Logic** (runs inside Chrome via `Runtime.evaluate`): + +``` +Step 1: Find and click [data-testid="composer-plus-btn"] + - Fallback: button with aria-label containing "Add files" + - If not found, throw "composer-plus-btn not found" + +Step 2: Wait for radix dropdown to appear + - Poll for [data-radix-collection-item] elements (up to 3s) + - If no dropdown items found, throw "dropdown did not open" + +Step 3: Find item with text "Deep research" + - Iterate [data-radix-collection-item] elements + - Match textContent.trim() === "Deep research" (case-insensitive fallback) + - If not found, collect all item texts and throw descriptive error: + "Deep research option not found in dropdown. Available: [Create image, Shopping research, ...]" + +Step 4: Click the Deep Research item + - Use dispatchClickSequence for React compatibility + +Step 5: Verify activation + - Poll for Deep Research pill in composer (up to 5s) + - Check: .__composer-pill-composite with aria-label containing "Deep research" + - Alternative: button text "Deep research" in composer footer area + - If pill not found, throw "Deep Research mode did not activate" +``` + +**Implementation Pattern** — following `ensureThinkingTime` from `thinkingTime.ts`: + +```typescript +export async function activateDeepResearch( + Runtime: ChromeClient["Runtime"], + _Input: ChromeClient["Input"], + logger: BrowserLogger, +): Promise { + const expression = buildActivateDeepResearchExpression(); + const outcome = await Runtime.evaluate({ + expression, + awaitPromise: true, + returnByValue: true, + }); + const result = outcome.result?.value as + | { status: "activated" } + | { status: "already-active" } + | { status: "plus-button-missing" } + | { status: "dropdown-item-missing"; available?: string[] } + | { status: "pill-not-confirmed" } + | undefined; + + switch (result?.status) { + case "activated": + logger("Deep Research mode activated"); + return; + case "already-active": + logger("Deep Research mode already active"); + return; + case "plus-button-missing": + throw new BrowserAutomationError( + "Could not find the composer plus button to activate Deep Research.", + { stage: "deep-research-activate", code: "plus-button-missing" }, + ); + case "dropdown-item-missing": { + const hint = result.available?.length + ? ` Available options: ${result.available.join(", ")}` + : ""; + throw new BrowserAutomationError( + `"Deep research" option not found in composer dropdown.${hint} ` + + "This feature may require a ChatGPT Plus or Pro subscription.", + { stage: "deep-research-activate", code: "dropdown-item-missing" }, + ); + } + case "pill-not-confirmed": + throw new BrowserAutomationError( + "Deep Research pill did not appear after selection. The UI may have changed.", + { stage: "deep-research-activate", code: "pill-not-confirmed" }, + ); + default: + throw new BrowserAutomationError( + "Unexpected result from Deep Research activation.", + { stage: "deep-research-activate" }, + ); + } +} +``` + +**DOM Expression Builder**: + +```typescript +function buildActivateDeepResearchExpression(): string { + const plusBtnSelector = JSON.stringify(DEEP_RESEARCH_PLUS_BUTTON); + const targetText = JSON.stringify(DEEP_RESEARCH_DROPDOWN_ITEM_TEXT); + const pillLabel = JSON.stringify(DEEP_RESEARCH_PILL_LABEL); + + return `(async () => { + ${buildClickDispatcher()} + + // Step 0: Check if already active + const existingPill = document.querySelector('.__composer-pill-composite'); + if (existingPill) { + const pillText = existingPill.textContent?.trim() || ''; + const pillAria = existingPill.querySelector('button')?.getAttribute('aria-label') || ''; + if (pillText.toLowerCase().includes('deep research') || + pillAria.toLowerCase().includes('deep research')) { + return { status: 'already-active' }; + } + } + + // Step 1: Click plus button + const plusBtn = document.querySelector(${plusBtnSelector}) || + Array.from(document.querySelectorAll('button')).find( + b => (b.getAttribute('aria-label') || '').toLowerCase().includes('add files') + ); + if (!plusBtn) return { status: 'plus-button-missing' }; + dispatchClickSequence(plusBtn); + + // Step 2: Wait for dropdown + const waitForDropdown = () => new Promise((resolve) => { + let elapsed = 0; + const tick = () => { + const items = document.querySelectorAll('[data-radix-collection-item]'); + if (items.length > 0) { resolve(items); return; } + elapsed += 150; + if (elapsed > 3000) { resolve(null); return; } + setTimeout(tick, 150); + }; + setTimeout(tick, 150); + }); + const items = await waitForDropdown(); + if (!items) return { status: 'dropdown-item-missing', available: [] }; + + // Step 3: Find "Deep research" item + const target = ${targetText}.toLowerCase(); + let match = null; + const available = []; + for (const item of items) { + const text = (item.textContent || '').trim(); + available.push(text); + if (text.toLowerCase() === target) { + match = item; + } + } + if (!match) return { status: 'dropdown-item-missing', available }; + + // Step 4: Click it + dispatchClickSequence(match); + + // Step 5: Verify pill appeared + const waitForPill = () => new Promise((resolve) => { + let elapsed = 0; + const tick = () => { + const pills = document.querySelectorAll('.__composer-pill-composite'); + for (const pill of pills) { + const text = pill.textContent?.trim() || ''; + const aria = pill.querySelector('button')?.getAttribute('aria-label') || ''; + if (text.toLowerCase().includes('deep research') || + aria.toLowerCase().includes('deep research')) { + resolve(true); return; + } + } + elapsed += 200; + if (elapsed > 5000) { resolve(false); return; } + setTimeout(tick, 200); + }; + setTimeout(tick, 200); + }); + const pillConfirmed = await waitForPill(); + return pillConfirmed ? { status: 'activated' } : { status: 'pill-not-confirmed' }; + })()`; +} +``` + +### Function 2: `waitForResearchPlanAutoConfirm` + +After prompt submission, waits for the research plan to appear and auto-confirm. + +```typescript +export async function waitForResearchPlanAutoConfirm( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + autoConfirmWaitMs: number = DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS, +): Promise +``` + +**Logic**: + +``` +1. Poll (every 2s, up to 30s) for research plan indicator: + - Check for iframe in assistant response area + - Check for text containing "research plan" or status like "Researching..." + - The plan appears quickly after submit (usually within 5-10s) + +2. Once plan detected: + - Log "Research plan generated, waiting for auto-confirm..." + - Wait autoConfirmWaitMs (default 70s) + - During this wait, periodically check if already confirmed + (iframe might disappear early if user clicks Start manually) + +3. After wait, verify research has started: + - Check for active research indicators ("Researching...", status text changes) + - If still showing plan after 90s, log warning but continue + +4. Log "Research started, monitoring progress..." +``` + +**Implementation**: + +```typescript +export async function waitForResearchPlanAutoConfirm( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + autoConfirmWaitMs: number = DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS, +): Promise { + // Phase A: Detect research plan appearance + const planDeadline = Date.now() + 60_000; // 60s to see plan + let planDetected = false; + + while (Date.now() < planDeadline) { + const { result } = await Runtime.evaluate({ + expression: `(() => { + // Check for iframe (research plan container) + const iframes = document.querySelectorAll('iframe'); + const hasResearchIframe = Array.from(iframes).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + // Check for research status text in assistant area + const assistantText = (document.querySelector('[data-message-author-role="assistant"]')?.textContent || '').toLowerCase(); + const hasResearchText = assistantText.includes('researching') || + assistantText.includes('research plan') || + assistantText.includes('survey') || + assistantText.includes('analyze'); + return { hasResearchIframe, hasResearchText }; + })()`, + returnByValue: true, + }); + + const val = result?.value as { hasResearchIframe?: boolean; hasResearchText?: boolean } | undefined; + if (val?.hasResearchIframe || val?.hasResearchText) { + planDetected = true; + logger("Research plan detected, waiting for auto-confirm countdown..."); + break; + } + await delay(2_000); + } + + if (!planDetected) { + logger("Warning: Research plan not detected within 60s; continuing (may have auto-confirmed already)"); + return; + } + + // Phase B: Wait for auto-confirm + const confirmStart = Date.now(); + while (Date.now() - confirmStart < autoConfirmWaitMs) { + // Check if research already started (early confirmation) + const { result } = await Runtime.evaluate({ + expression: `(() => { + const iframes = document.querySelectorAll('iframe'); + const hasLargeIframe = Array.from(iframes).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + const text = (document.body?.innerText || '').toLowerCase(); + const isResearching = text.includes('researching...') || + text.includes('reading sources') || + text.includes('considering'); + return { hasLargeIframe, isResearching }; + })()`, + returnByValue: true, + }); + const val = result?.value as { hasLargeIframe?: boolean; isResearching?: boolean } | undefined; + + // If research status text appeared, the plan was confirmed + if (val?.isResearching) { + logger("Research plan confirmed, execution started"); + return; + } + + await delay(5_000); + } + + logger("Auto-confirm wait complete, proceeding to monitor research progress"); +} +``` + +### Function 3: `waitForDeepResearchCompletion` + +Polls for Deep Research completion over 5-30 minutes. + +```typescript +export async function waitForDeepResearchCompletion( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + timeoutMs: number = DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, +): Promise<{ + text: string; + html?: string; + meta: { turnId?: string | null; messageId?: string | null }; +}> +``` + +**Logic**: + +``` +Poll every DEEP_RESEARCH_POLL_INTERVAL_MS (5s), up to timeoutMs: + +1. Check for FINISHED_ACTIONS_SELECTOR (copy/thumbs buttons) + → Definitive completion signal + +2. Check for stop button + → Still generating = research in progress + +3. Read assistant response text length + → Track progress, detect stalls + +4. Every 60s, log status update: + "Deep Research in progress... {elapsed}s elapsed, {textLength} chars so far" + +5. On completion: + → Extract full text via existing captureAssistantMarkdown/readAssistantSnapshot + → Return structured result + +6. On timeout: + → Throw BrowserAutomationError with stage: "deep-research-timeout" + → Include partial text and runtime metadata for reattach +``` + +**Implementation**: + +```typescript +export async function waitForDeepResearchCompletion( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + timeoutMs: number = DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, +): Promise<{ + text: string; + html?: string; + meta: { turnId?: string | null; messageId?: string | null }; +}> { + const start = Date.now(); + let lastLogTime = start; + let lastTextLength = 0; + const finishedSelector = JSON.stringify(FINISHED_ACTIONS_SELECTOR); + const stopSelector = JSON.stringify(STOP_BUTTON_SELECTOR); + + logger(`Monitoring Deep Research (timeout: ${Math.round(timeoutMs / 60_000)}min)...`); + + while (Date.now() - start < timeoutMs) { + const { result } = await Runtime.evaluate({ + expression: `(() => { + const finished = Boolean(document.querySelector(${finishedSelector})); + const stopVisible = Boolean(document.querySelector(${stopSelector})); + // Get assistant response text length + const turns = document.querySelectorAll('[data-message-author-role="assistant"]'); + const lastTurn = turns[turns.length - 1]; + const textLength = (lastTurn?.textContent || '').length; + // Check for iframe (research still in plan/execution phase) + const hasIframe = Array.from(document.querySelectorAll('iframe')).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + return { finished, stopVisible, textLength, hasIframe }; + })()`, + returnByValue: true, + }); + + const val = result?.value as { + finished?: boolean; + stopVisible?: boolean; + textLength?: number; + hasIframe?: boolean; + } | undefined; + + // Completion detected + if (val?.finished) { + logger(`Deep Research completed (${Math.round((Date.now() - start) / 1000)}s elapsed)`); + // Extract the full response using existing patterns + return await extractDeepResearchResult(Runtime, logger); + } + + // Progress logging every 60 seconds + const now = Date.now(); + if (now - lastLogTime >= 60_000) { + const elapsed = Math.round((now - start) / 1000); + const chars = val?.textLength ?? 0; + const phase = val?.hasIframe ? "researching" : val?.stopVisible ? "generating" : "waiting"; + logger(`Deep Research ${phase}... ${elapsed}s elapsed, ~${chars} chars`); + lastLogTime = now; + } + + lastTextLength = val?.textLength ?? lastTextLength; + await delay(DEEP_RESEARCH_POLL_INTERVAL_MS); + } + + // Timeout — try to extract partial result + const elapsed = Math.round((Date.now() - start) / 1000); + throw new BrowserAutomationError( + `Deep Research did not complete within ${Math.round(timeoutMs / 60_000)} minutes (${elapsed}s elapsed). ` + + `Use 'oracle session ' to reattach later, or increase --timeout.`, + { + stage: "deep-research-timeout", + code: "deep-research-timeout", + elapsedMs: Date.now() - start, + lastTextLength, + }, + ); +} +``` + +### Helper: `extractDeepResearchResult` + +Reuses existing assistant response extraction logic: + +```typescript +async function extractDeepResearchResult( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, +): Promise<{ + text: string; + html?: string; + meta: { turnId?: string | null; messageId?: string | null }; +}> { + // Use the same extraction logic as waitForAssistantResponse + // but target the last assistant turn which contains the Deep Research report + // ... delegates to existing captureAssistantMarkdown / readAssistantSnapshot +} +``` + +## Exports + +```typescript +export { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, +}; +``` + +## Key Design Decisions + +1. **Text matching for "Deep research"**: Use exact text match (`textContent.trim() === "Deep research"`) with case-insensitive fallback. More robust than relying on missing `data-testid`. + +2. **Auto-confirm over iframe interaction**: The ~60s countdown auto-confirms the research plan. This avoids the complexity of cross-origin iframe DOM manipulation entirely. + +3. **5-second poll interval**: Longer than normal response polling (100ms) because Deep Research runs for minutes. Reduces unnecessary CDP calls while still detecting completion promptly. + +4. **Progress logging every 60s**: Keeps the user informed during the long wait without spamming output. + +5. **Reuse existing extraction**: The final Deep Research report is standard markdown in the conversation. `captureAssistantMarkdown` and `readAssistantSnapshot` from `assistantResponse.ts` should work unchanged. diff --git a/docs/deep-research-plan/03-flow-integration.md b/docs/deep-research-plan/03-flow-integration.md new file mode 100644 index 00000000..9e803868 --- /dev/null +++ b/docs/deep-research-plan/03-flow-integration.md @@ -0,0 +1,231 @@ +# Phase 3: Main Flow Integration — `index.ts` + +## Overview + +Wire the Deep Research actions into Oracle's main browser automation flow in `src/browser/index.ts`. The changes insert Deep Research activation before prompt submission and replace the standard response waiting with Deep Research-specific monitoring. + +## Changes to `src/browser/index.ts` + +### 3.1 New Imports + +Add at the top of the file (after existing action imports): + +```typescript +import { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, +} from "./actions/deepResearch.js"; +``` + +### 3.2 Flow Modification Point: After Model/Thinking Selection + +**Current flow** (lines ~438-486): + +``` +1. Model selection (ensureModelSelection) +2. Thinking time selection (ensureThinkingTime) +3. Submit prompt (submitOnce) +4. Wait for response (waitForAssistantResponse) +``` + +**New flow** when `config.deepResearch === true`: + +``` +1. Model selection → SKIPPED (modelStrategy already set to "ignore" in config) +2. Thinking time → SKIPPED (Deep Research replaces thinking pill) +3. Activate Deep Research mode (NEW) +4. Submit prompt (submitOnce — unchanged) +5. Wait for research plan auto-confirm (NEW) +6. Wait for Deep Research completion (NEW — replaces waitForAssistantResponse) +``` + +### 3.3 Insert Deep Research Activation + +After the thinking time block (line ~486), add: + +```typescript +// Handle Deep Research activation if specified +const deepResearch = config.deepResearch ?? false; +if (deepResearch) { + await raceWithDisconnect( + withRetries( + () => activateDeepResearch(Runtime, Input, logger), + { + retries: 2, + delayMs: 500, + onRetry: (attempt, error) => { + if (options.verbose) { + logger( + `[retry] Deep Research activation attempt ${attempt + 1}: ${ + error instanceof Error ? error.message : error + }`, + ); + } + }, + }, + ), + ); + // Ensure prompt textarea is still ready after Deep Research activation + await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger)); + logger( + `Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`, + ); +} +``` + +### 3.4 Conditional Skip of Model Selection and Thinking Time + +Wrap the existing model selection and thinking time blocks: + +```typescript +// Model selection: skip for Deep Research (handled via pill activation) +if (!deepResearch) { + if (config.desiredModel && modelStrategy !== "ignore") { + // ... existing model selection code ... + } + // Thinking time: skip for Deep Research + if (thinkingTime) { + // ... existing thinking time code ... + } +} +``` + +Alternatively, this can be handled in `browserConfig.ts` by forcing `modelStrategy: "ignore"` and `thinkingTime: undefined` when `deepResearch` is true. + +### 3.5 Replace Response Waiting for Deep Research + +The existing flow after prompt submission (simplified): + +```typescript +const { answerText, answerMarkdown, answerHtml, tookMs, answerTokens } = + await waitForAssistantResponseWithReload(...); +``` + +Add conditional Deep Research flow: + +```typescript +let answerText: string; +let answerMarkdown: string; +let answerHtml: string | undefined; +let tookMs: number; +let answerTokens: number; + +if (deepResearch) { + // Phase A: Wait for research plan auto-confirm + await raceWithDisconnect( + waitForResearchPlanAutoConfirm(Runtime, logger), + ); + + // Phase B: Monitor research completion (5-30 minutes) + const researchResult = await raceWithDisconnect( + waitForDeepResearchCompletion(Runtime, logger, config.timeoutMs), + ); + + // Phase C: Capture final markdown + const markdown = await raceWithDisconnect( + captureAssistantMarkdown(Runtime, logger), + ); + + answerText = researchResult.text; + answerMarkdown = markdown || researchResult.text; + answerHtml = researchResult.html; + tookMs = Date.now() - startedAt; + answerTokens = estimateTokenCount(answerText); +} else { + // ... existing normal response waiting flow ... +} +``` + +### 3.6 Update Session Header Message + +In `sessionRunner.ts` (line ~91), modify the timing hint: + +```typescript +const timingHint = deepResearch + ? "This Deep Research run can take 5-30 minutes." + : "This run can take up to an hour (usually ~10 minutes)."; +log(chalk.dim(timingHint)); +``` + +### 3.7 Error Handling Adjustments + +For Deep Research timeout errors, include reattach-friendly metadata: + +```typescript +if (deepResearch && error instanceof BrowserAutomationError) { + // Enrich error with session info for reattach + error.metadata = { + ...error.metadata, + deepResearch: true, + conversationUrl: lastUrl, + elapsedMs: Date.now() - startedAt, + }; +} +``` + +### 3.8 Update `pageActions.ts` Re-exports + +In `src/browser/pageActions.ts`, add re-exports: + +```typescript +export { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, +} from "./actions/deepResearch.js"; +``` + +## Flow Diagram + +``` +runBrowserMode(options) + │ + ├── connectChrome / syncCookies / navigate (unchanged) + │ + ├── if (!deepResearch): + │ ├── ensureModelSelection(...) + │ └── ensureThinkingTime(...) + │ + ├── if (deepResearch): + │ └── activateDeepResearch(Runtime, Input, logger) + │ + ├── ensurePromptReady(...) (unchanged) + │ + ├── submitOnce(prompt, attachments) (unchanged) + │ + ├── if (deepResearch): + │ ├── waitForResearchPlanAutoConfirm(...) ← NEW (wait ~70s) + │ ├── waitForDeepResearchCompletion(...) ← NEW (poll 5-30min) + │ └── captureAssistantMarkdown(...) ← EXISTING + │ + ├── if (!deepResearch): + │ └── waitForAssistantResponseWithReload(...) ← EXISTING + │ + └── return BrowserRunResult +``` + +## Interaction with Existing Features + +### Attachments + +File uploads happen BEFORE Deep Research activation, so no conflicts: +1. Upload files (existing `uploadAttachmentFile`) +2. Activate Deep Research pill +3. Submit prompt +4. Wait for research + +### Auto-Reattach + +The existing `autoReattachDelayMs` / `autoReattachIntervalMs` mechanism works for Deep Research, but the defaults should be longer. When `deepResearch` is true: +- `autoReattachDelayMs`: 120_000 (2 minutes, vs default 60s) +- `autoReattachIntervalMs`: 60_000 (1 minute checks) +- `autoReattachTimeoutMs`: 300_000 (5 minutes per attempt) + +### Heartbeat + +The existing heartbeat mechanism (`heartbeatIntervalMs`) keeps the session alive during long Deep Research runs. No changes needed. + +### Conversation URL Tracking + +The `/c/` URL appears immediately after prompt submission (same as normal chat). The existing `scheduleConversationHint` call works unchanged. diff --git a/docs/deep-research-plan/04-reattach-and-sessions.md b/docs/deep-research-plan/04-reattach-and-sessions.md new file mode 100644 index 00000000..0fbcc5ff --- /dev/null +++ b/docs/deep-research-plan/04-reattach-and-sessions.md @@ -0,0 +1,199 @@ +# Phase 4: Reattach & Session Support + +## Overview + +Deep Research runs take 5-30 minutes. If a run is interrupted (timeout, network issue, user Ctrl+C), the research continues in ChatGPT. Oracle's reattach mechanism should support reconnecting to in-progress Deep Research sessions and extracting results. + +## How Reattach Works Today + +Oracle's reattach flow (`src/browser/reattach.ts`): + +1. User runs `oracle session ` or auto-reattach triggers +2. Load session metadata (chrome port, conversation URL, target ID) +3. Reconnect to Chrome via CDP +4. Navigate to the conversation URL +5. Call `waitForAssistantResponse` with a short timeout +6. Extract the response text + +## Changes Needed + +### 4.1 `src/browser/reattach.ts` + +#### In `resumeBrowserSession` and `resumeBrowserSessionViaNewChrome`: + +When the session config has `deepResearch: true`: + +```typescript +if (sessionConfig.deepResearch) { + // Deep Research may still be running — use extended timeout + const deepResearchTimeout = sessionConfig.timeoutMs ?? DEEP_RESEARCH_DEFAULT_TIMEOUT_MS; + + // First check if research is already complete + const quickCheck = await checkDeepResearchStatus(Runtime, logger); + + if (quickCheck.completed) { + // Research finished while we were disconnected — extract result + logger("Deep Research already completed, extracting result..."); + return await extractDeepResearchResult(Runtime, logger); + } + + // Research still in progress — resume monitoring + logger(`Deep Research still in progress, resuming monitoring (timeout: ${Math.round(deepResearchTimeout / 60_000)}min)...`); + return await waitForDeepResearchCompletion(Runtime, logger, deepResearchTimeout); +} +``` + +### 4.2 New Helper: `checkDeepResearchStatus` + +Add to `src/browser/actions/deepResearch.ts`: + +```typescript +export async function checkDeepResearchStatus( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, +): Promise<{ + completed: boolean; + inProgress: boolean; + hasIframe: boolean; + textLength: number; +}> { + const { result } = await Runtime.evaluate({ + expression: `(() => { + const finishedBtns = document.querySelector(${JSON.stringify(FINISHED_ACTIONS_SELECTOR)}); + const stopBtn = document.querySelector(${JSON.stringify(STOP_BUTTON_SELECTOR)}); + const iframes = Array.from(document.querySelectorAll('iframe')).filter(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + const turns = document.querySelectorAll('[data-message-author-role="assistant"]'); + const lastTurn = turns[turns.length - 1]; + const textLength = (lastTurn?.textContent || '').length; + return { + completed: Boolean(finishedBtns), + inProgress: Boolean(stopBtn) || iframes.length > 0, + hasIframe: iframes.length > 0, + textLength, + }; + })()`, + returnByValue: true, + }); + + const val = result?.value as { + completed?: boolean; + inProgress?: boolean; + hasIframe?: boolean; + textLength?: number; + } | undefined; + + return { + completed: val?.completed ?? false, + inProgress: val?.inProgress ?? false, + hasIframe: val?.hasIframe ?? false, + textLength: val?.textLength ?? 0, + }; +} +``` + +### 4.3 Session Store: Persist Deep Research Flag + +In `src/sessionStore.ts`, ensure `deepResearch` is persisted with the session: + +```typescript +// When saving session +session.browserConfig.deepResearch = runOptions.deepResearch; + +// When loading session for reattach +const isDeepResearch = session.browserConfig?.deepResearch ?? false; +``` + +### 4.4 Auto-Reattach Adjustments + +In `src/browser/index.ts`, when building auto-reattach config for Deep Research: + +```typescript +if (deepResearch) { + // Override auto-reattach timing for Deep Research + effectiveAutoReattachDelay = Math.max( + config.autoReattachDelayMs ?? 0, + 120_000, // Wait at least 2 minutes before first reattach attempt + ); + effectiveAutoReattachInterval = Math.max( + config.autoReattachIntervalMs ?? 0, + 60_000, // Check every minute + ); + effectiveAutoReattachTimeout = Math.max( + config.autoReattachTimeoutMs ?? 0, + 300_000, // 5 minutes per attempt + ); +} +``` + +### 4.5 `oracle status` Command + +The `oracle status` command shows running sessions. For Deep Research sessions, display the mode: + +```typescript +const modeLabel = session.browserConfig?.deepResearch + ? " [Deep Research]" + : ""; +console.log(` ${session.slug}${modeLabel} — ${session.status} (${elapsed})`); +``` + +### 4.6 Zombie Session Detection + +In `src/browser/index.ts` or session management code, adjust zombie timeout for Deep Research: + +```typescript +// Deep Research sessions should not be considered zombies for at least 40 minutes +const effectiveZombieTimeout = session.browserConfig?.deepResearch + ? Math.max(zombieTimeoutMs, DEEP_RESEARCH_DEFAULT_TIMEOUT_MS) + : zombieTimeoutMs; +``` + +## Reattach Flow Diagram + +``` +oracle session + │ + ├── Load session metadata + │ └── deepResearch: true, conversationUrl: /c/xxx + │ + ├── Reconnect to Chrome (existing logic) + │ + ├── Navigate to conversation URL (existing logic) + │ + ├── checkDeepResearchStatus() + │ │ + │ ├── completed: true + │ │ └── extractDeepResearchResult() → return report + │ │ + │ ├── inProgress: true + │ │ └── waitForDeepResearchCompletion() → return report + │ │ + │ └── neither (error state) + │ └── Try normal waitForAssistantResponse as fallback + │ + └── Return result +``` + +## Edge Cases + +### Research completed but page not refreshed +- On reattach, the page is already at the conversation URL +- `FINISHED_ACTIONS_SELECTOR` should be visible immediately +- `checkDeepResearchStatus` handles this case + +### Chrome was closed during research +- Research continues server-side in ChatGPT +- On reattach, launch new Chrome, sync cookies, navigate to conversation URL +- Research result should be waiting (ChatGPT preserves it) + +### Multiple Deep Research sessions +- Oracle's session management already handles multiple sessions +- Each session has its own slug and conversation URL +- Reattach targets a specific session + +### Rate limits +- ChatGPT Plus has limited Deep Research uses per month +- If Deep Research is unavailable, `activateDeepResearch` will throw when the dropdown item is missing +- Error message should mention subscription tier requirements diff --git a/docs/deep-research-plan/05-testing.md b/docs/deep-research-plan/05-testing.md new file mode 100644 index 00000000..075c2f25 --- /dev/null +++ b/docs/deep-research-plan/05-testing.md @@ -0,0 +1,358 @@ +# Phase 5: Testing Strategy + +## Overview + +Testing follows Oracle's existing patterns: unit tests with mocked CDP, integration tests for full flows, and optional live tests against real Chrome + ChatGPT. + +## Test Files + +### 5.1 Unit Tests: `tests/browser/deepResearch.test.ts` + +Following the pattern from `thinkingTime.test.ts`: + +```typescript +import { describe, it, expect, vi } from "vitest"; +import { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, + checkDeepResearchStatus, +} from "../../src/browser/actions/deepResearch.js"; + +// Mock CDP Runtime +const mockRuntime = { + evaluate: vi.fn(), +}; +const mockInput = {}; +const mockLogger = Object.assign(vi.fn(), { verbose: false, sessionLog: vi.fn() }); +``` + +#### Test Cases for `activateDeepResearch`: + +```typescript +describe("activateDeepResearch", () => { + it("activates Deep Research when all steps succeed", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "activated" } }, + }); + await expect( + activateDeepResearch(mockRuntime as any, mockInput as any, mockLogger), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith("Deep Research mode activated"); + }); + + it("returns early when already active", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "already-active" } }, + }); + await expect( + activateDeepResearch(mockRuntime as any, mockInput as any, mockLogger), + ).resolves.toBeUndefined(); + }); + + it("throws when plus button is missing", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "plus-button-missing" } }, + }); + await expect( + activateDeepResearch(mockRuntime as any, mockInput as any, mockLogger), + ).rejects.toThrow(/composer plus button/); + }); + + it("throws with available options when Deep Research item missing", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { + status: "dropdown-item-missing", + available: ["Create image", "Web search", "Shopping research"], + }, + }, + }); + await expect( + activateDeepResearch(mockRuntime as any, mockInput as any, mockLogger), + ).rejects.toThrow(/Deep research.*not found.*Available.*Create image/); + }); + + it("throws when pill does not confirm", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "pill-not-confirmed" } }, + }); + await expect( + activateDeepResearch(mockRuntime as any, mockInput as any, mockLogger), + ).rejects.toThrow(/pill did not appear/); + }); +}); +``` + +#### Test Cases for `waitForResearchPlanAutoConfirm`: + +```typescript +describe("waitForResearchPlanAutoConfirm", () => { + it("detects research plan and waits for auto-confirm", async () => { + // First poll: plan detected + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { hasResearchIframe: true, hasResearchText: false } }, + }); + // Subsequent polls: research started + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { hasLargeIframe: false, isResearching: true } }, + }); + + await expect( + waitForResearchPlanAutoConfirm(mockRuntime as any, mockLogger, 1000), + ).resolves.toBeUndefined(); + }); + + it("handles plan not detected gracefully", async () => { + // All polls: nothing detected + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { hasResearchIframe: false, hasResearchText: false } }, + }); + + await expect( + waitForResearchPlanAutoConfirm(mockRuntime as any, mockLogger, 100), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith( + expect.stringContaining("not detected"), + ); + }); +}); +``` + +#### Test Cases for `waitForDeepResearchCompletion`: + +```typescript +describe("waitForDeepResearchCompletion", () => { + it("detects completion via finished actions", async () => { + // First poll: still in progress + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { finished: false, stopVisible: true, textLength: 100, hasIframe: true } }, + }); + // Second poll: completed + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { finished: true, stopVisible: false, textLength: 5000, hasIframe: false } }, + }); + // Extract result + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { text: "Research report...", turnId: "t1", messageId: "m1" } }, + }); + + const result = await waitForDeepResearchCompletion( + mockRuntime as any, mockLogger, 60_000, + ); + expect(result.text).toContain("Research report"); + }); + + it("throws on timeout with metadata", async () => { + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { finished: false, stopVisible: true, textLength: 500, hasIframe: true } }, + }); + + await expect( + waitForDeepResearchCompletion(mockRuntime as any, mockLogger, 500), + ).rejects.toThrow(/did not complete/); + }); +}); +``` + +#### Test Cases for `checkDeepResearchStatus`: + +```typescript +describe("checkDeepResearchStatus", () => { + it("reports completed when finished actions visible", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { completed: true, inProgress: false, hasIframe: false, textLength: 5000 } }, + }); + const status = await checkDeepResearchStatus(mockRuntime as any, mockLogger); + expect(status.completed).toBe(true); + expect(status.inProgress).toBe(false); + }); + + it("reports in-progress when iframe present", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { completed: false, inProgress: true, hasIframe: true, textLength: 0 } }, + }); + const status = await checkDeepResearchStatus(mockRuntime as any, mockLogger); + expect(status.completed).toBe(false); + expect(status.inProgress).toBe(true); + }); +}); +``` + +### 5.2 DOM Expression Tests: `tests/browser/deepResearchExpressions.test.ts` + +Test the generated DOM expressions in isolation (following `promptComposerExpressions.test.ts` pattern): + +```typescript +describe("Deep Research DOM expressions", () => { + it("buildActivateDeepResearchExpression generates valid JavaScript", () => { + const expr = buildActivateDeepResearchExpression(); + // Verify it contains expected selectors + expect(expr).toContain('composer-plus-btn'); + expect(expr).toContain('Deep research'); + expect(expr).toContain('data-radix-collection-item'); + }); +}); +``` + +### 5.3 Integration Test: `tests/browser/deepResearchFlow.test.ts` + +Full flow test with mocked CDP client: + +```typescript +describe("Deep Research full flow", () => { + it("completes end-to-end: activate, submit, plan, research, extract", async () => { + const mockClient = createMockCDPClient(); + + // Setup mock responses for each phase: + // 1. Activation succeeds + // 2. Prompt submission succeeds + // 3. Plan appears and auto-confirms + // 4. Research completes + // 5. Text extraction succeeds + + // Run the full browser mode flow with deepResearch: true + const result = await runBrowserMode({ + prompt: "Test research query", + config: { deepResearch: true, timeoutMs: 10_000 }, + log: mockLogger, + }); + + expect(result.answerText).toBeTruthy(); + expect(result.tookMs).toBeGreaterThan(0); + }); +}); +``` + +### 5.4 CLI Tests: `tests/cli/deepResearchConfig.test.ts` + +```typescript +describe("--deep-research CLI flag", () => { + it("forces browser engine", () => { + const config = buildBrowserConfig({ deepResearch: true }); + expect(config.deepResearch).toBe(true); + }); + + it("sets model strategy to ignore", () => { + const config = buildBrowserConfig({ deepResearch: true }); + expect(config.modelStrategy).toBe("ignore"); + }); + + it("sets extended default timeout", () => { + const config = buildBrowserConfig({ deepResearch: true }); + expect(config.timeoutMs).toBeGreaterThanOrEqual(2_400_000); + }); + + it("is mutually exclusive with --models", () => { + expect(() => { + validateOptions({ deepResearch: true, models: ["gpt-5.2", "gemini-3-pro"] }); + }).toThrow(); + }); +}); +``` + +### 5.5 Live Test: `tests/live/deep-research-live.test.ts` + +Following the pattern from `gemini-deep-think-live.test.ts`: + +```typescript +import { describe, it, expect } from "vitest"; + +const LIVE = process.env.ORACLE_LIVE_TESTS === "1"; + +describe.skipIf(!LIVE)("Deep Research live", () => { + it( + "submits a research query and receives a report", + async () => { + // This test requires: + // - Chrome with active ChatGPT session + // - ChatGPT Plus or Pro subscription + // - ORACLE_LIVE_TESTS=1 environment variable + + const result = await runBrowserMode({ + prompt: "What are the top 3 programming languages by GitHub usage in 2026? Brief summary only.", + config: { + deepResearch: true, + timeoutMs: 1_800_000, // 30 minutes + }, + log: console.log, + }); + + expect(result.answerText).toBeTruthy(); + expect(result.answerText.length).toBeGreaterThan(500); + expect(result.tookMs).toBeGreaterThan(60_000); + }, + 1_800_000, // 30 minute test timeout + ); +}); +``` + +### 5.6 Manual Test Checklist + +Append to `docs/manual-tests.md`: + +```markdown +## Deep Research (Browser) + +### Prerequisites +- Chrome signed into ChatGPT (Plus or Pro) +- Oracle installed globally or via npx + +### Test 1: Basic Deep Research +oracle --deep-research -p "Summarize the top 3 AI agent frameworks in 2026" -v + +Expected: +- Activates Deep Research pill in composer +- Submits prompt +- Research plan appears (cross-origin iframe) +- Auto-confirms after ~60 seconds +- Research runs for 5-15 minutes +- Final report extracted as markdown + +### Test 2: Deep Research with file context +oracle --deep-research -p "Analyze this project architecture" --file "src/**/*.ts" -v + +Expected: +- Files uploaded first +- Deep Research activated after upload +- Prompt includes file context + +### Test 3: Reattach to interrupted Deep Research +# Start, then Ctrl+C during research +oracle --deep-research -p "Comprehensive market analysis" --timeout 2m -v +# Reattach +oracle session + +Expected: +- Session saved with deepResearch flag +- Reattach detects Deep Research in progress +- Monitors until completion or timeout + +### Test 4: Error - Deep Research unavailable +# Test with a free-tier account +oracle --deep-research -p "Test query" -v + +Expected: +- Clear error message about subscription requirement +- Lists available dropdown options + +### Test 5: Custom timeout +oracle --deep-research --timeout 60m -p "Very detailed research topic" -v + +Expected: +- Timeout set to 60 minutes +- Session does not expire prematurely +``` + +## Test Coverage Targets + +| Area | Coverage Goal | Method | +|------|--------------|--------| +| `activateDeepResearch` | All 5 status codes | Unit test | +| `waitForResearchPlanAutoConfirm` | Plan detected, plan missed, early confirm | Unit test | +| `waitForDeepResearchCompletion` | Completion, timeout, progress tracking | Unit test | +| `checkDeepResearchStatus` | All 3 states | Unit test | +| CLI flag parsing | Mutual exclusion, defaults, engine forcing | CLI test | +| Config propagation | All config layers | CLI test | +| Full flow | End-to-end with mocks | Integration test | +| Real ChatGPT | Actual Deep Research run | Live test (manual) | diff --git a/docs/manual-tests.md b/docs/manual-tests.md index 6cf5d6d2..9124c013 100644 --- a/docs/manual-tests.md +++ b/docs/manual-tests.md @@ -259,3 +259,64 @@ These Vitest cases hit the real OpenAI API to exercise both transports: Skip these unless you're intentionally validating the production API; they are fully gated behind `ORACLE_LIVE_TEST=1` to avoid accidental CI runs. + +## Deep Research (Browser) + +### Prerequisites +- Chrome signed into ChatGPT (Plus or Pro) +- Oracle installed globally or via npx + +### Test 1: Basic Deep Research +```bash +oracle --deep-research -p "Summarize the top 3 AI agent frameworks in 2026" -v +``` + +Expected: +- Activates Deep Research pill in composer +- Submits prompt +- Research plan appears (cross-origin iframe) +- Auto-confirms after ~60 seconds +- Research runs for 5-15 minutes +- Final report extracted as markdown + +### Test 2: Deep Research with file context +```bash +oracle --deep-research -p "Analyze this project architecture" --file "src/**/*.ts" -v +``` + +Expected: +- Files uploaded first +- Deep Research activated after upload +- Prompt includes file context + +### Test 3: Reattach to interrupted Deep Research +```bash +# Start, then Ctrl+C during research +oracle --deep-research -p "Comprehensive market analysis" --timeout 2m -v +# Reattach +oracle session +``` + +Expected: +- Session saved with deepResearch flag +- Reattach detects Deep Research in progress +- Monitors until completion or timeout + +### Test 4: Error - Deep Research unavailable +```bash +# Test with a free-tier account +oracle --deep-research -p "Test query" -v +``` + +Expected: +- Clear error message about subscription requirement +- Lists available dropdown options + +### Test 5: Custom timeout +```bash +oracle --deep-research --timeout 60m -p "Very detailed research topic" -v +``` + +Expected: +- Timeout set to 60 minutes +- Session does not expire prematurely diff --git a/src/browser/actions/deepResearch.ts b/src/browser/actions/deepResearch.ts new file mode 100644 index 00000000..edfde0ca --- /dev/null +++ b/src/browser/actions/deepResearch.ts @@ -0,0 +1,414 @@ +import type { ChromeClient, BrowserLogger } from "../types.js"; +import { + DEEP_RESEARCH_PLUS_BUTTON, + DEEP_RESEARCH_DROPDOWN_ITEM_TEXT, + DEEP_RESEARCH_PILL_LABEL, + DEEP_RESEARCH_POLL_INTERVAL_MS, + DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS, + DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, + FINISHED_ACTIONS_SELECTOR, + STOP_BUTTON_SELECTOR, +} from "../constants.js"; +import { delay } from "../utils.js"; +import { buildClickDispatcher } from "./domEvents.js"; +import { captureAssistantMarkdown, readAssistantSnapshot } from "./assistantResponse.js"; +import { BrowserAutomationError } from "../../oracle/errors.js"; + +type ActivateOutcome = + | { status: "activated" } + | { status: "already-active" } + | { status: "plus-button-missing" } + | { status: "dropdown-item-missing"; available?: string[] } + | { status: "pill-not-confirmed" }; + +/** + * Activates Deep Research mode by clicking the "+" button and selecting + * "Deep research" from the radix dropdown menu. + */ +export async function activateDeepResearch( + Runtime: ChromeClient["Runtime"], + _Input: ChromeClient["Input"], + logger: BrowserLogger, +): Promise { + const expression = buildActivateDeepResearchExpression(); + const outcome = await Runtime.evaluate({ + expression, + awaitPromise: true, + returnByValue: true, + }); + const result = outcome.result?.value as ActivateOutcome | undefined; + + switch (result?.status) { + case "activated": + logger("Deep Research mode activated"); + return; + case "already-active": + logger("Deep Research mode already active"); + return; + case "plus-button-missing": + throw new BrowserAutomationError( + "Could not find the composer plus button to activate Deep Research.", + { stage: "deep-research-activate", code: "plus-button-missing" }, + ); + case "dropdown-item-missing": { + const hint = result.available?.length + ? ` Available options: ${result.available.join(", ")}` + : ""; + throw new BrowserAutomationError( + `"Deep research" option not found in composer dropdown.${hint} ` + + "This feature may require a ChatGPT Plus or Pro subscription.", + { stage: "deep-research-activate", code: "dropdown-item-missing" }, + ); + } + case "pill-not-confirmed": + throw new BrowserAutomationError( + "Deep Research pill did not appear after selection. The UI may have changed.", + { stage: "deep-research-activate", code: "pill-not-confirmed" }, + ); + default: + throw new BrowserAutomationError("Unexpected result from Deep Research activation.", { + stage: "deep-research-activate", + }); + } +} + +/** + * After prompt submission, waits for the research plan to appear and + * auto-confirm (~60s countdown + 10s safety margin). + */ +export async function waitForResearchPlanAutoConfirm( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + autoConfirmWaitMs: number = DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS, +): Promise { + // Phase A: Detect research plan appearance (up to 60s) + const planDeadline = Date.now() + 60_000; + let planDetected = false; + + while (Date.now() < planDeadline) { + const { result } = await Runtime.evaluate({ + expression: `(() => { + const iframes = document.querySelectorAll('iframe'); + const hasResearchIframe = Array.from(iframes).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + const assistantText = (document.querySelector('[data-message-author-role="assistant"]')?.textContent || '').toLowerCase(); + const hasResearchText = assistantText.includes('researching') || + assistantText.includes('research plan') || + assistantText.includes('survey') || + assistantText.includes('analyze'); + return { hasResearchIframe, hasResearchText }; + })()`, + returnByValue: true, + }); + + const val = result?.value as + | { hasResearchIframe?: boolean; hasResearchText?: boolean } + | undefined; + if (val?.hasResearchIframe || val?.hasResearchText) { + planDetected = true; + logger("Research plan detected, waiting for auto-confirm countdown..."); + break; + } + await delay(2_000); + } + + if (!planDetected) { + logger( + "Warning: Research plan not detected within 60s; continuing (may have auto-confirmed already)", + ); + return; + } + + // Phase B: Wait for auto-confirm countdown + const confirmStart = Date.now(); + while (Date.now() - confirmStart < autoConfirmWaitMs) { + const { result } = await Runtime.evaluate({ + expression: `(() => { + const iframes = document.querySelectorAll('iframe'); + const hasLargeIframe = Array.from(iframes).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + const text = (document.body?.innerText || '').toLowerCase(); + const isResearching = text.includes('researching...') || + text.includes('reading sources') || + text.includes('considering'); + return { hasLargeIframe, isResearching }; + })()`, + returnByValue: true, + }); + const val = result?.value as + | { hasLargeIframe?: boolean; isResearching?: boolean } + | undefined; + + if (val?.isResearching) { + logger("Research plan confirmed, execution started"); + return; + } + + await delay(5_000); + } + + logger("Auto-confirm wait complete, proceeding to monitor research progress"); +} + +/** + * Polls for Deep Research completion over 5-30+ minutes. + * Returns the full response text, optional HTML, and turn metadata. + */ +export async function waitForDeepResearchCompletion( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, + timeoutMs: number = DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, +): Promise<{ + text: string; + html?: string; + meta: { turnId?: string | null; messageId?: string | null }; +}> { + const start = Date.now(); + let lastLogTime = start; + let lastTextLength = 0; + const finishedSelector = JSON.stringify(FINISHED_ACTIONS_SELECTOR); + const stopSelector = JSON.stringify(STOP_BUTTON_SELECTOR); + + logger(`Monitoring Deep Research (timeout: ${Math.round(timeoutMs / 60_000)}min)...`); + + while (Date.now() - start < timeoutMs) { + const { result } = await Runtime.evaluate({ + expression: `(() => { + const finished = Boolean(document.querySelector(${finishedSelector})); + const stopVisible = Boolean(document.querySelector(${stopSelector})); + const turns = document.querySelectorAll('[data-message-author-role="assistant"]'); + const lastTurn = turns[turns.length - 1]; + const textLength = (lastTurn?.textContent || '').length; + const hasIframe = Array.from(document.querySelectorAll('iframe')).some(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + return { finished, stopVisible, textLength, hasIframe }; + })()`, + returnByValue: true, + }); + + const val = result?.value as + | { + finished?: boolean; + stopVisible?: boolean; + textLength?: number; + hasIframe?: boolean; + } + | undefined; + + // Completion detected + if (val?.finished) { + logger(`Deep Research completed (${Math.round((Date.now() - start) / 1000)}s elapsed)`); + return await extractDeepResearchResult(Runtime, logger); + } + + // Progress logging every 60 seconds + const now = Date.now(); + if (now - lastLogTime >= 60_000) { + const elapsed = Math.round((now - start) / 1000); + const chars = val?.textLength ?? 0; + const phase = val?.hasIframe ? "researching" : val?.stopVisible ? "generating" : "waiting"; + logger(`Deep Research ${phase}... ${elapsed}s elapsed, ~${chars} chars`); + lastLogTime = now; + } + + lastTextLength = val?.textLength ?? lastTextLength; + await delay(DEEP_RESEARCH_POLL_INTERVAL_MS); + } + + // Timeout — throw with metadata for potential reattach + const elapsed = Math.round((Date.now() - start) / 1000); + throw new BrowserAutomationError( + `Deep Research did not complete within ${Math.round(timeoutMs / 60_000)} minutes (${elapsed}s elapsed). ` + + "Use 'oracle session ' to reattach later, or increase --timeout.", + { + stage: "deep-research-timeout", + code: "deep-research-timeout", + elapsedMs: Date.now() - start, + lastTextLength, + }, + ); +} + +/** + * Extracts the Deep Research result using existing assistant response + * extraction logic (readAssistantSnapshot + captureAssistantMarkdown). + */ +export async function extractDeepResearchResult( + Runtime: ChromeClient["Runtime"], + logger: BrowserLogger, +): Promise<{ + text: string; + html?: string; + meta: { turnId?: string | null; messageId?: string | null }; +}> { + const snapshot = await readAssistantSnapshot(Runtime); + const meta = { + turnId: snapshot?.turnId ?? null, + messageId: snapshot?.messageId ?? null, + }; + + // Try the copy-button approach first for clean markdown + const markdown = await captureAssistantMarkdown(Runtime, meta, logger); + if (markdown) { + return { text: markdown, html: snapshot?.html ?? undefined, meta }; + } + + // Fall back to snapshot text + if (snapshot?.text) { + return { text: snapshot.text, html: snapshot.html ?? undefined, meta }; + } + + throw new BrowserAutomationError( + "Deep Research completed but failed to extract the response text.", + { stage: "deep-research-extract", code: "extraction-failed" }, + ); +} + +/** + * Quick status check for Deep Research — used during reattach to determine + * whether research has completed, is still in progress, or is in an unknown state. + */ +export async function checkDeepResearchStatus( + Runtime: ChromeClient["Runtime"], + _logger: BrowserLogger, +): Promise<{ + completed: boolean; + inProgress: boolean; + hasIframe: boolean; + textLength: number; +}> { + const finishedSelector = JSON.stringify(FINISHED_ACTIONS_SELECTOR); + const stopSelector = JSON.stringify(STOP_BUTTON_SELECTOR); + + const { result } = await Runtime.evaluate({ + expression: `(() => { + const finished = Boolean(document.querySelector(${finishedSelector})); + const stopVisible = Boolean(document.querySelector(${stopSelector})); + const iframes = Array.from(document.querySelectorAll('iframe')).filter(f => { + const rect = f.getBoundingClientRect(); + return rect.width > 200 && rect.height > 200; + }); + const turns = document.querySelectorAll('[data-message-author-role="assistant"]'); + const lastTurn = turns[turns.length - 1]; + const textLength = (lastTurn?.textContent || '').length; + return { + completed: finished, + inProgress: stopVisible || iframes.length > 0, + hasIframe: iframes.length > 0, + textLength, + }; + })()`, + returnByValue: true, + }); + + const val = result?.value as + | { + completed?: boolean; + inProgress?: boolean; + hasIframe?: boolean; + textLength?: number; + } + | undefined; + + return { + completed: val?.completed ?? false, + inProgress: val?.inProgress ?? false, + hasIframe: val?.hasIframe ?? false, + textLength: val?.textLength ?? 0, + }; +} + +// --------------------------------------------------------------------------- +// DOM expression builder +// --------------------------------------------------------------------------- + +function buildActivateDeepResearchExpression(): string { + const plusBtnSelector = JSON.stringify(DEEP_RESEARCH_PLUS_BUTTON); + const targetText = JSON.stringify(DEEP_RESEARCH_DROPDOWN_ITEM_TEXT); + const pillLabel = JSON.stringify(DEEP_RESEARCH_PILL_LABEL); + + // pillLabel is used inside the expression for verification + void pillLabel; + + return `(async () => { + ${buildClickDispatcher()} + + // Step 0: Check if already active + const existingPill = document.querySelector('.__composer-pill-composite'); + if (existingPill) { + const pillText = existingPill.textContent?.trim() || ''; + const pillAria = existingPill.querySelector('button')?.getAttribute('aria-label') || ''; + if (pillText.toLowerCase().includes('deep research') || + pillAria.toLowerCase().includes('deep research')) { + return { status: 'already-active' }; + } + } + + // Step 1: Click plus button + const plusBtn = document.querySelector(${plusBtnSelector}) || + Array.from(document.querySelectorAll('button')).find( + b => (b.getAttribute('aria-label') || '').toLowerCase().includes('add files') + ); + if (!plusBtn) return { status: 'plus-button-missing' }; + dispatchClickSequence(plusBtn); + + // Step 2: Wait for dropdown + const waitForDropdown = () => new Promise((resolve) => { + let elapsed = 0; + const tick = () => { + const items = document.querySelectorAll('[data-radix-collection-item]'); + if (items.length > 0) { resolve(items); return; } + elapsed += 150; + if (elapsed > 3000) { resolve(null); return; } + setTimeout(tick, 150); + }; + setTimeout(tick, 150); + }); + const items = await waitForDropdown(); + if (!items) return { status: 'dropdown-item-missing', available: [] }; + + // Step 3: Find "Deep research" item + const target = ${targetText}.toLowerCase(); + let match = null; + const available = []; + for (const item of items) { + const text = (item.textContent || '').trim(); + available.push(text); + if (text.toLowerCase() === target) { + match = item; + } + } + if (!match) return { status: 'dropdown-item-missing', available }; + + // Step 4: Click it + dispatchClickSequence(match); + + // Step 5: Verify pill appeared + const waitForPill = () => new Promise((resolve) => { + let elapsed = 0; + const tick = () => { + const pills = document.querySelectorAll('.__composer-pill-composite'); + for (const pill of pills) { + const text = pill.textContent?.trim() || ''; + const aria = pill.querySelector('button')?.getAttribute('aria-label') || ''; + if (text.toLowerCase().includes('deep research') || + aria.toLowerCase().includes('deep research')) { + resolve(true); return; + } + } + elapsed += 200; + if (elapsed > 5000) { resolve(false); return; } + setTimeout(tick, 200); + }; + setTimeout(tick, 200); + }); + const pillConfirmed = await waitForPill(); + return pillConfirmed ? { status: 'activated' } : { status: 'pill-not-confirmed' }; + })()`; +} diff --git a/src/browser/config.ts b/src/browser/config.ts index a0c278e2..92907c97 100644 --- a/src/browser/config.ts +++ b/src/browser/config.ts @@ -1,4 +1,9 @@ -import { CHATGPT_URL, DEFAULT_MODEL_STRATEGY, DEFAULT_MODEL_TARGET } from "./constants.js"; +import { + CHATGPT_URL, + DEFAULT_MODEL_STRATEGY, + DEFAULT_MODEL_TARGET, + DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, +} from "./constants.js"; import { normalizeBrowserModelStrategy } from "./modelStrategy.js"; import type { BrowserAutomationConfig, ResolvedBrowserConfig } from "./types.js"; import { isTemporaryChatUrl, normalizeChatgptUrl } from "./utils.js"; @@ -82,7 +87,9 @@ export function resolveBrowserConfig( ...config, url: normalizedUrl, chatgptUrl: normalizedUrl, - timeoutMs: config?.timeoutMs ?? DEFAULT_BROWSER_CONFIG.timeoutMs, + timeoutMs: + config?.timeoutMs ?? + (config?.deepResearch ? DEEP_RESEARCH_DEFAULT_TIMEOUT_MS : DEFAULT_BROWSER_CONFIG.timeoutMs), debugPort: config?.debugPort ?? debugPortEnv ?? DEFAULT_BROWSER_CONFIG.debugPort, inputTimeoutMs: config?.inputTimeoutMs ?? DEFAULT_BROWSER_CONFIG.inputTimeoutMs, assistantRecheckDelayMs: @@ -114,6 +121,7 @@ export function resolveBrowserConfig( allowCookieErrors: config?.allowCookieErrors ?? envAllowCookieErrors ?? DEFAULT_BROWSER_CONFIG.allowCookieErrors, thinkingTime: config?.thinkingTime, + deepResearch: config?.deepResearch, manualLogin, manualLoginProfileDir: manualLogin ? resolvedProfileDir : null, manualLoginCookieSync: diff --git a/src/browser/constants.ts b/src/browser/constants.ts index e2e417a7..a37f0dbe 100644 --- a/src/browser/constants.ts +++ b/src/browser/constants.ts @@ -81,5 +81,16 @@ export const SEND_BUTTON_SELECTOR = SEND_BUTTON_SELECTORS[0]; export const MODEL_BUTTON_SELECTOR = '[data-testid="model-switcher-dropdown-button"]'; export const COPY_BUTTON_SELECTOR = 'button[data-testid="copy-turn-action-button"]'; // Action buttons that only appear once a turn has finished rendering. +// Deep Research selectors and constants +export const DEEP_RESEARCH_PLUS_BUTTON = '[data-testid="composer-plus-btn"]'; +export const DEEP_RESEARCH_DROPDOWN_ITEM_TEXT = "Deep research"; +export const DEEP_RESEARCH_PILL_LABEL = "Deep research"; +/** Polling interval for Deep Research completion (5 seconds). */ +export const DEEP_RESEARCH_POLL_INTERVAL_MS = 5_000; +/** Auto-confirm wait time (countdown ~60s + 10s safety margin). */ +export const DEEP_RESEARCH_AUTO_CONFIRM_WAIT_MS = 70_000; +/** Default timeout for Deep Research completion (40 minutes). */ +export const DEEP_RESEARCH_DEFAULT_TIMEOUT_MS = 2_400_000; + export const FINISHED_ACTIONS_SELECTOR = 'button[data-testid="copy-turn-action-button"], button[data-testid="good-response-turn-action-button"], button[data-testid="bad-response-turn-action-button"], button[aria-label="Share"]'; diff --git a/src/browser/index.ts b/src/browser/index.ts index b89cd19e..db7191eb 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -40,6 +40,11 @@ import { import { INPUT_SELECTORS } from "./constants.js"; import { uploadAttachmentViaDataTransfer } from "./actions/remoteFileTransfer.js"; import { ensureThinkingTime } from "./actions/thinkingTime.js"; +import { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, +} from "./actions/deepResearch.js"; import { estimateTokenCount, withRetries, delay } from "./utils.js"; import { formatElapsed } from "../oracle/format.js"; import { CHATGPT_URL, CONVERSATION_TURN_SELECTOR, DEFAULT_MODEL_STRATEGY } from "./constants.js"; @@ -467,9 +472,10 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise ensureThinkingTime(Runtime, thinkingTime, logger), { retries: 2, @@ -484,6 +490,26 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise activateDeepResearch(Runtime, Input, logger), { + retries: 2, + delayMs: 500, + onRetry: (attempt, error) => { + if (options.verbose) { + logger( + `[retry] Deep Research activation attempt ${attempt + 1}: ${error instanceof Error ? error.message : error}`, + ); + } + }, + }), + ); + await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger)); + logger( + `Prompt textarea ready (after Deep Research activation, ${promptText.length.toLocaleString()} chars queued)`, + ); + } const profileLockTimeoutMs = manualLogin ? (config.profileLockTimeoutMs ?? 0) : 0; let profileLock: ProfileRunLock | null = null; const acquireProfileLockIfNeeded = async () => { @@ -587,9 +613,12 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise false); + await captureRuntimeSnapshot().catch(() => undefined); + const runtime = { + chromePid: chrome.pid, + chromePort: chrome.port, + chromeHost, + userDataDir, + chromeTargetId: lastTargetId, + tabUrl: lastUrl, + conversationId: lastUrl ? extractConversationIdFromUrl(lastUrl) : undefined, + controllerPid: process.pid, + }; + if (error instanceof BrowserAutomationError) { + throw new BrowserAutomationError(error.message, { + ...(error.details as object), + stage: "deep-research-timeout", + runtime, + deepResearch: true, + }, error); + } + throw error; + } + } + stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false); // Helper to normalize text for echo detection (collapse whitespace, lowercase) const normalizeForComparison = (text: string): string => diff --git a/src/browser/pageActions.ts b/src/browser/pageActions.ts index 850470de..478baf2b 100644 --- a/src/browser/pageActions.ts +++ b/src/browser/pageActions.ts @@ -23,3 +23,10 @@ export { buildMarkdownFallbackExtractorForTest, buildCopyExpressionForTest, } from "./actions/assistantResponse.js"; +export { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, + checkDeepResearchStatus, + extractDeepResearchResult, +} from "./actions/deepResearch.js"; diff --git a/src/browser/reattach.ts b/src/browser/reattach.ts index 1c683f20..adeb9742 100644 --- a/src/browser/reattach.ts +++ b/src/browser/reattach.ts @@ -10,7 +10,11 @@ import { ensureNotBlocked, ensureLoggedIn, ensurePromptReady, + checkDeepResearchStatus, + waitForDeepResearchCompletion, + extractDeepResearchResult, } from "./pageActions.js"; +import { DEEP_RESEARCH_DEFAULT_TIMEOUT_MS } from "./constants.js"; import type { BrowserLogger, ChromeClient } from "./types.js"; import { launchChrome, connectToChrome, hideChromeWindow } from "./chromeLifecycle.js"; import { resolveBrowserConfig } from "./config.js"; @@ -127,6 +131,48 @@ export async function resumeBrowserSession( "Reattach target did not respond", ); await ensureConversationOpen(); + + // Deep Research reattach: check status and either extract or resume monitoring + if (config?.deepResearch) { + const deepResearchTimeout = config.timeoutMs ?? DEEP_RESEARCH_DEFAULT_TIMEOUT_MS; + const status = await checkDeepResearchStatus(Runtime, logger); + + if (status.completed) { + logger("Deep Research already completed, extracting result..."); + const drResult = await extractDeepResearchResult(Runtime, logger); + if (client && typeof client.close === "function") { + try { + await client.close(); + } catch { + // ignore + } + } + return { answerText: drResult.text, answerMarkdown: drResult.text }; + } + + if (status.inProgress) { + logger( + `Deep Research still in progress, resuming monitoring (timeout: ${Math.round(deepResearchTimeout / 60_000)}min)...`, + ); + const drResult = await waitForDeepResearchCompletion( + Runtime, + logger, + deepResearchTimeout, + ); + if (client && typeof client.close === "function") { + try { + await client.close(); + } catch { + // ignore + } + } + return { answerText: drResult.text, answerMarkdown: drResult.text }; + } + + // Neither completed nor in-progress — fall through to normal response waiting + logger("Deep Research status unclear, falling back to normal response extraction..."); + } + const minTurnIndex = await readConversationTurnIndex(Runtime, logger); const promptEcho = buildPromptEchoMatcher(deps.promptPreview); const answer = await withTimeout( @@ -244,6 +290,50 @@ async function resumeBrowserSessionViaNewChrome( await waitForLocationChange(Runtime, 15_000); } + // Deep Research reattach via new Chrome + if (config?.deepResearch) { + const deepResearchTimeout = resolved.timeoutMs ?? DEEP_RESEARCH_DEFAULT_TIMEOUT_MS; + const status = await checkDeepResearchStatus(Runtime, logger); + + let drResult: { text: string } | undefined; + if (status.completed) { + logger("Deep Research already completed, extracting result..."); + drResult = await extractDeepResearchResult(Runtime, logger); + } else if (status.inProgress) { + logger( + `Deep Research still in progress, resuming monitoring (timeout: ${Math.round(deepResearchTimeout / 60_000)}min)...`, + ); + drResult = await waitForDeepResearchCompletion(Runtime, logger, deepResearchTimeout); + } else { + logger("Deep Research status unclear, falling back to normal response extraction..."); + } + + if (drResult) { + if (client && typeof client.close === "function") { + try { + await client.close(); + } catch { + // ignore + } + } + if (!resolved.keepBrowser) { + try { + await chrome.kill(); + } catch { + // ignore + } + if (manualLogin) { + await cleanupStaleProfileState(userDataDir, logger, { + lockRemovalMode: "never", + }).catch(() => undefined); + } else { + await rm(userDataDir, { recursive: true, force: true }).catch(() => undefined); + } + } + return { answerText: drResult.text, answerMarkdown: drResult.text }; + } + } + const waitForResponse = deps.waitForAssistantResponse ?? waitForAssistantResponse; const captureMarkdown = deps.captureAssistantMarkdown ?? captureAssistantMarkdown; const timeoutMs = resolved.timeoutMs ?? 120_000; diff --git a/src/browser/sessionRunner.ts b/src/browser/sessionRunner.ts index 48e5ebd2..7bdf2341 100644 --- a/src/browser/sessionRunner.ts +++ b/src/browser/sessionRunner.ts @@ -88,7 +88,10 @@ export async function runBrowserSessionExecution( automationLogger.sessionLog = runOptions.verbose ? log : () => {}; log(headerLine); - log(chalk.dim("This run can take up to an hour (usually ~10 minutes).")); + const timingHint = browserConfig.deepResearch + ? "This Deep Research run can take 5-30 minutes." + : "This run can take up to an hour (usually ~10 minutes)."; + log(chalk.dim(timingHint)); if (runOptions.verbose) { log(chalk.dim("Chrome automation does not stream output; this may take a minute...")); } diff --git a/src/browser/types.ts b/src/browser/types.ts index 7c514672..a7cfdfaf 100644 --- a/src/browser/types.ts +++ b/src/browser/types.ts @@ -59,6 +59,8 @@ export interface BrowserAutomationConfig { manualLoginCookieSync?: boolean; /** Thinking time intensity level for Thinking/Pro models: light, standard, extended, heavy */ thinkingTime?: ThinkingTimeLevel; + /** Enable ChatGPT Deep Research mode (browser engine only). */ + deepResearch?: boolean; } export interface BrowserRunOptions { @@ -73,6 +75,8 @@ export interface BrowserRunOptions { log?: BrowserLogger; heartbeatIntervalMs?: number; verbose?: boolean; + /** Whether this is a Deep Research run (affects timeouts and response detection). */ + deepResearch?: boolean; /** Optional hook to persist runtime info (port/url/target) as soon as Chrome is ready. */ runtimeHintCb?: (hint: BrowserRuntimeMetadata) => void | Promise; } @@ -102,6 +106,7 @@ export type ResolvedBrowserConfig = Required< | "desiredModel" | "remoteChrome" | "thinkingTime" + | "deepResearch" | "modelStrategy" > > & { @@ -111,6 +116,7 @@ export type ResolvedBrowserConfig = Required< desiredModel?: string | null; modelStrategy?: BrowserModelStrategy; thinkingTime?: ThinkingTimeLevel; + deepResearch?: boolean; debugPort?: number | null; inlineCookiesSource?: string | null; remoteChrome?: { host: string; port: number } | null; diff --git a/src/cli/browserConfig.ts b/src/cli/browserConfig.ts index ad2919c4..f84b33ed 100644 --- a/src/cli/browserConfig.ts +++ b/src/cli/browserConfig.ts @@ -66,6 +66,8 @@ export interface BrowserFlagOptions { browserManualLoginProfileDir?: string | null; /** Thinking time intensity: 'light', 'standard', 'extended', 'heavy' */ browserThinkingTime?: ThinkingTimeLevel; + /** Enable ChatGPT Deep Research mode. */ + deepResearch?: boolean; browserModelLabel?: string; browserModelStrategy?: BrowserModelStrategy; browserAllowCookieErrors?: boolean; @@ -113,8 +115,9 @@ export async function buildBrowserConfig( const isChatGptModel = baseModel.startsWith("gpt-") && !baseModel.includes("codex"); const shouldUseOverride = !isChatGptModel && normalizedOverride.length > 0 && normalizedOverride !== baseModel; - const modelStrategy = - normalizeBrowserModelStrategy(options.browserModelStrategy) ?? DEFAULT_MODEL_STRATEGY; + const modelStrategy = options.deepResearch + ? "ignore" + : (normalizeBrowserModelStrategy(options.browserModelStrategy) ?? DEFAULT_MODEL_STRATEGY); const cookieNames = parseCookieNames( options.browserCookieNames ?? process.env.ORACLE_BROWSER_COOKIE_NAMES, ); @@ -206,6 +209,7 @@ export async function buildBrowserConfig( allowCookieErrors: options.browserAllowCookieErrors ?? true, remoteChrome, thinkingTime: options.browserThinkingTime, + deepResearch: options.deepResearch ?? undefined, }; } diff --git a/src/cli/browserDefaults.ts b/src/cli/browserDefaults.ts index af854e77..c99bedf5 100644 --- a/src/cli/browserDefaults.ts +++ b/src/cli/browserDefaults.ts @@ -25,6 +25,7 @@ export interface BrowserDefaultsOptions { browserKeepBrowser?: boolean; browserModelStrategy?: BrowserModelStrategy; browserThinkingTime?: ThinkingTimeLevel; + deepResearch?: boolean; browserManualLogin?: boolean; browserManualLoginProfileDir?: string | null; } @@ -113,6 +114,9 @@ export function applyBrowserDefaultsFromConfig( if (isUnset("browserThinkingTime") && browser.thinkingTime !== undefined) { options.browserThinkingTime = browser.thinkingTime; } + if (isUnset("deepResearch") && browser.deepResearch !== undefined) { + options.deepResearch = browser.deepResearch; + } if (isUnset("browserManualLogin") && browser.manualLogin !== undefined) { options.browserManualLogin = browser.manualLogin; } diff --git a/src/cli/sessionRunner.ts b/src/cli/sessionRunner.ts index 261f96d9..675e715d 100644 --- a/src/cli/sessionRunner.ts +++ b/src/cli/sessionRunner.ts @@ -634,15 +634,24 @@ async function autoReattachUntilComplete({ log(dim("Auto-reattach disabled: missing runtime or browser config.")); return false; } - const delayMs = Math.max(0, browserConfig.autoReattachDelayMs ?? 0); - const intervalMs = Math.max(0, browserConfig.autoReattachIntervalMs ?? 0); + const isDeepResearch = browserConfig.deepResearch === true; + const delayMs = Math.max( + browserConfig.autoReattachDelayMs ?? 0, + isDeepResearch ? 120_000 : 0, // Deep Research: wait at least 2 min before first attempt + ); + const intervalMs = Math.max( + browserConfig.autoReattachIntervalMs ?? 0, + isDeepResearch ? 60_000 : 0, // Deep Research: check every minute at minimum + ); if (intervalMs <= 0) { return false; } - const timeoutMs = + const timeoutMs = Math.max( Math.max(0, browserConfig.autoReattachTimeoutMs ?? 0) || - Math.max(0, browserConfig.timeoutMs ?? 0) || - 120_000; + Math.max(0, browserConfig.timeoutMs ?? 0) || + 120_000, + isDeepResearch ? 300_000 : 0, // Deep Research: 5 min per attempt at minimum + ); const maxTotalMs = 2 * 60 * 60 * 1000; // 2h hard cap; avoid infinite polling by default. const maxDeadline = Date.now() + maxTotalMs; diff --git a/src/cli/sessionTable.ts b/src/cli/sessionTable.ts index 6b0ef2f0..4c93c3e5 100644 --- a/src/cli/sessionTable.ts +++ b/src/cli/sessionTable.ts @@ -35,7 +35,10 @@ export function formatSessionTableRow( const status = colorStatus(meta.status ?? "unknown", rich); const modelLabel = (meta.model ?? "n/a").padEnd(MODEL_PAD); const model = rich ? chalk.white(modelLabel) : modelLabel; - const modeLabel = (meta.mode ?? meta.options?.mode ?? "api").padEnd(MODE_PAD); + const modeRaw = meta.mode ?? meta.options?.mode ?? "api"; + const modeLabel = ( + modeRaw === "browser" && meta.browser?.config?.deepResearch ? "browser/dr" : modeRaw + ).padEnd(MODE_PAD); const mode = rich ? chalk.gray(modeLabel) : modeLabel; const timestampLabel = formatTimestampAligned(meta.createdAt).padEnd(TIMESTAMP_PAD); const timestamp = rich ? chalk.gray(timestampLabel) : timestampLabel; diff --git a/src/config.ts b/src/config.ts index 5963ef26..818da02d 100644 --- a/src/config.ts +++ b/src/config.ts @@ -49,6 +49,8 @@ export interface BrowserConfigDefaults { modelStrategy?: BrowserModelStrategy; /** Thinking time intensity (ChatGPT Thinking/Pro models): 'light', 'standard', 'extended', 'heavy' */ thinkingTime?: ThinkingTimeLevel; + /** Enable ChatGPT Deep Research mode (browser engine only). */ + deepResearch?: boolean; /** Skip cookie sync and reuse a persistent automation profile (waits for manual ChatGPT login). */ manualLogin?: boolean; /** Manual-login profile directory override (also available via ORACLE_BROWSER_PROFILE_DIR). */ diff --git a/src/sessionManager.ts b/src/sessionManager.ts index 3f6500ce..d7ee586d 100644 --- a/src/sessionManager.ts +++ b/src/sessionManager.ts @@ -57,6 +57,8 @@ export interface BrowserSessionConfig { manualLoginCookieSync?: boolean; /** Thinking time intensity: 'light', 'standard', 'extended', 'heavy' */ thinkingTime?: ThinkingTimeLevel; + /** Enable ChatGPT Deep Research mode. */ + deepResearch?: boolean; } export interface BrowserRuntimeMetadata { @@ -849,6 +851,11 @@ function resolveZombieMaxAgeMs(meta: SessionMetadata): number { maxAgeMs = timeoutMs; } } + // Deep Research sessions run 5-30+ minutes; extend zombie threshold + if (meta.browser?.config?.deepResearch) { + const deepResearchMinMs = 2_400_000; // 40 minutes — matches DEEP_RESEARCH_DEFAULT_TIMEOUT_MS + maxAgeMs = Math.max(maxAgeMs, deepResearchMinMs); + } } return maxAgeMs; } diff --git a/tests/browser/deepResearch.test.ts b/tests/browser/deepResearch.test.ts new file mode 100644 index 00000000..7e0c227c --- /dev/null +++ b/tests/browser/deepResearch.test.ts @@ -0,0 +1,282 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; + +// Mock delay to resolve instantly in tests +vi.mock("../../src/browser/utils.js", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + delay: vi.fn(() => Promise.resolve()), + }; +}); + +import { + activateDeepResearch, + waitForResearchPlanAutoConfirm, + waitForDeepResearchCompletion, + checkDeepResearchStatus, +} from "../../src/browser/actions/deepResearch.js"; +import type { BrowserLogger } from "../../src/browser/types.js"; + +function createMockRuntime() { + return { + evaluate: vi.fn(), + }; +} + +function createMockLogger(): BrowserLogger { + const fn = vi.fn() as BrowserLogger; + fn.verbose = false; + fn.sessionLog = vi.fn(); + return fn; +} + +describe("activateDeepResearch", () => { + let mockRuntime: ReturnType; + let mockInput: Record; + let mockLogger: BrowserLogger; + + beforeEach(() => { + mockRuntime = createMockRuntime(); + mockInput = {}; + mockLogger = createMockLogger(); + }); + + it("activates Deep Research when all steps succeed", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "activated" } }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith("Deep Research mode activated"); + }); + + it("returns early when already active", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "already-active" } }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith("Deep Research mode already active"); + }); + + it("throws when plus button is missing", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "plus-button-missing" } }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).rejects.toThrow(/composer plus button/); + }); + + it("throws with available options when Deep Research item missing", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { + status: "dropdown-item-missing", + available: ["Create image", "Web search"], + }, + }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).rejects.toThrow(/not found.*Create image/); + }); + + it("throws when pill does not confirm", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "pill-not-confirmed" } }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).rejects.toThrow(/pill did not appear/); + }); + + it("throws on unexpected result", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { status: "unknown-status" } }, + }); + await expect( + activateDeepResearch(mockRuntime as never, mockInput as never, mockLogger), + ).rejects.toThrow(/Unexpected result/); + }); +}); + +describe("waitForResearchPlanAutoConfirm", () => { + let mockRuntime: ReturnType; + let mockLogger: BrowserLogger; + + beforeEach(() => { + mockRuntime = createMockRuntime(); + mockLogger = createMockLogger(); + }); + + it("detects research plan via iframe and waits for auto-confirm", async () => { + // Phase A: plan detected via iframe + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { hasResearchIframe: true, hasResearchText: false } }, + }); + // Phase B: research started + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { hasLargeIframe: false, isResearching: true } }, + }); + + await expect( + waitForResearchPlanAutoConfirm(mockRuntime as never, mockLogger, 1_000), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith( + expect.stringContaining("Research plan detected"), + ); + }); + + it("detects research plan via text content", async () => { + // Phase A: plan detected via text + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: { hasResearchIframe: false, hasResearchText: true } }, + }); + // Phase B: research started + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { hasLargeIframe: false, isResearching: true } }, + }); + + await expect( + waitForResearchPlanAutoConfirm(mockRuntime as never, mockLogger, 1_000), + ).resolves.toBeUndefined(); + }); + + it("handles plan not detected gracefully", async () => { + // All polls: nothing detected — use short timeout to avoid slow test + mockRuntime.evaluate.mockResolvedValue({ + result: { value: { hasResearchIframe: false, hasResearchText: false } }, + }); + + // Override planDeadline by passing very short auto-confirm wait + // The function internally waits up to 60s for plan detection; + // we can't easily shorten that, so we rely on the implementation + // returning gracefully when plan isn't found. + // Since the plan detection polls every 2s for up to 60s, this test + // would be slow. Instead, test that the function handles the timeout path. + // We'll use a trick: mock Date.now to advance time quickly. + const realDateNow = Date.now; + let fakeNow = realDateNow(); + vi.spyOn(Date, "now").mockImplementation(() => { + fakeNow += 30_000; // Jump 30s each call + return fakeNow; + }); + + await expect( + waitForResearchPlanAutoConfirm(mockRuntime as never, mockLogger, 100), + ).resolves.toBeUndefined(); + expect(mockLogger).toHaveBeenCalledWith( + expect.stringContaining("not detected"), + ); + + vi.spyOn(Date, "now").mockRestore(); + }); +}); + +describe("waitForDeepResearchCompletion", () => { + let mockRuntime: ReturnType; + let mockLogger: BrowserLogger; + + beforeEach(() => { + mockRuntime = createMockRuntime(); + mockLogger = createMockLogger(); + }); + + it("detects completion via finished actions", async () => { + // First poll: still in progress + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { finished: false, stopVisible: true, textLength: 100, hasIframe: true }, + }, + }); + // Second poll: completed + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { finished: true, stopVisible: false, textLength: 5000, hasIframe: false }, + }, + }); + // extractDeepResearchResult → readAssistantSnapshot + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { + text: "Research report content", + html: "

Research report content

", + turnId: "t1", + messageId: "m1", + }, + }, + }); + // extractDeepResearchResult → captureAssistantMarkdown (copy button click) + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: null }, + }); + + const result = await waitForDeepResearchCompletion( + mockRuntime as never, + mockLogger, + 60_000, + ); + expect(result.text).toBe("Research report content"); + }); + + it("throws on timeout with metadata", async () => { + // All polls: never completed + mockRuntime.evaluate.mockResolvedValue({ + result: { + value: { finished: false, stopVisible: true, textLength: 500, hasIframe: true }, + }, + }); + + // Use very short timeout + await expect( + waitForDeepResearchCompletion(mockRuntime as never, mockLogger, 100), + ).rejects.toThrow(/did not complete/); + }); +}); + +describe("checkDeepResearchStatus", () => { + let mockRuntime: ReturnType; + let mockLogger: BrowserLogger; + + beforeEach(() => { + mockRuntime = createMockRuntime(); + mockLogger = createMockLogger(); + }); + + it("reports completed when finished actions visible", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { completed: true, inProgress: false, hasIframe: false, textLength: 5000 }, + }, + }); + const status = await checkDeepResearchStatus(mockRuntime as never, mockLogger); + expect(status.completed).toBe(true); + expect(status.inProgress).toBe(false); + expect(status.textLength).toBe(5000); + }); + + it("reports in-progress when iframe present", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { + value: { completed: false, inProgress: true, hasIframe: true, textLength: 0 }, + }, + }); + const status = await checkDeepResearchStatus(mockRuntime as never, mockLogger); + expect(status.completed).toBe(false); + expect(status.inProgress).toBe(true); + expect(status.hasIframe).toBe(true); + }); + + it("handles undefined result gracefully", async () => { + mockRuntime.evaluate.mockResolvedValueOnce({ + result: { value: undefined }, + }); + const status = await checkDeepResearchStatus(mockRuntime as never, mockLogger); + expect(status.completed).toBe(false); + expect(status.inProgress).toBe(false); + expect(status.textLength).toBe(0); + }); +}); diff --git a/tests/browser/reattach.test.ts b/tests/browser/reattach.test.ts index c5208bde..a58f7be7 100644 --- a/tests/browser/reattach.test.ts +++ b/tests/browser/reattach.test.ts @@ -110,6 +110,77 @@ describe("resumeBrowserSession", () => { expect(result.answerText).toBe("fallback"); expect(recoverSession).toHaveBeenCalled(); }); + + test("uses Deep Research flow when config.deepResearch is true and completed", async () => { + const runtime = { + chromePort: 51559, + chromeHost: "127.0.0.1", + chromeTargetId: "target-1", + tabUrl: "https://chatgpt.com/c/abc", + }; + const evaluate = vi.fn(async ({ expression }: { expression: string }) => { + if (expression === "location.href") { + return { result: { value: runtime.tabUrl } }; + } + if (expression === "1+1") { + return { result: { value: 2 } }; + } + // checkDeepResearchStatus: completed + if (expression.includes("completed") && expression.includes("inProgress")) { + return { + result: { + value: { completed: true, inProgress: false, hasIframe: false, textLength: 3000 }, + }, + }; + } + // extractDeepResearchResult → readAssistantSnapshot + if (expression.includes("data-message-author-role")) { + return { + result: { + value: { + text: "Deep Research report", + html: "

Deep Research report

", + turnId: "t1", + messageId: "m1", + }, + }, + }; + } + // captureAssistantMarkdown — copy button not found + return { result: { value: null } }; + }); + const listTargets = vi.fn( + async () => + [ + { targetId: "target-1", type: "page", url: runtime.tabUrl }, + ] satisfies FakeTarget[], + ) as unknown as () => Promise; + const connect = vi.fn( + async () => + ({ + // biome-ignore lint/style/useNamingConvention: mirrors DevTools protocol domain names + Runtime: { enable: vi.fn(), evaluate }, + // biome-ignore lint/style/useNamingConvention: mirrors DevTools protocol domain names + DOM: { enable: vi.fn() }, + close: vi.fn(async () => {}), + }) satisfies FakeClient, + ) as unknown as (options?: unknown) => Promise; + const logger = vi.fn() as BrowserLogger; + logger.verbose = true; + + const result = await resumeBrowserSession( + runtime, + { deepResearch: true, timeoutMs: 5_000 }, + logger, + { listTargets, connect }, + ); + + expect(result.answerText).toBe("Deep Research report"); + expect(result.answerMarkdown).toBe("Deep Research report"); + expect(logger).toHaveBeenCalledWith( + expect.stringContaining("already completed"), + ); + }); }); describe("reattach helpers", () => { diff --git a/tests/cli/browserConfig.test.ts b/tests/cli/browserConfig.test.ts index 70f6c407..8355d074 100644 --- a/tests/cli/browserConfig.test.ts +++ b/tests/cli/browserConfig.test.ts @@ -196,6 +196,32 @@ describe("buildBrowserConfig", () => { }), ).rejects.toThrow(/between 1 and 65535/i); }); + + test("deep-research flag sets deepResearch to true", async () => { + const config = await buildBrowserConfig({ model: "gpt-5.2-pro", deepResearch: true }); + expect(config.deepResearch).toBe(true); + }); + + test("deep-research forces model strategy to ignore", async () => { + const config = await buildBrowserConfig({ model: "gpt-5.2-pro", deepResearch: true }); + expect(config.modelStrategy).toBe("ignore"); + }); + + test("deep-research uses runtime default timeout (not set in config)", async () => { + const config = await buildBrowserConfig({ model: "gpt-5.2-pro", deepResearch: true }); + // Timeout is handled at runtime by DEEP_RESEARCH_DEFAULT_TIMEOUT_MS, + // not set in buildBrowserConfig unless explicitly provided + expect(config.timeoutMs).toBeUndefined(); + }); + + test("deep-research preserves explicit user timeout", async () => { + const config = await buildBrowserConfig({ + model: "gpt-5.2-pro", + deepResearch: true, + browserTimeout: "60m", + }); + expect(config.timeoutMs).toBe(3_600_000); + }); }); describe("resolveBrowserModelLabel", () => { diff --git a/tests/cli/deepResearchSessionTable.test.ts b/tests/cli/deepResearchSessionTable.test.ts new file mode 100644 index 00000000..01d63da1 --- /dev/null +++ b/tests/cli/deepResearchSessionTable.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, test } from "vitest"; +import { formatSessionTableRow } from "../../src/cli/sessionTable.js"; +import type { SessionMetadata } from "../../src/sessionStore.js"; + +function makeSession(overrides: Partial = {}): SessionMetadata { + return { + id: "test-session", + createdAt: new Date().toISOString(), + status: "completed", + mode: "browser", + model: "gpt-5.2-pro", + ...overrides, + } as SessionMetadata; +} + +describe("formatSessionTableRow — Deep Research label", () => { + test("shows browser/dr for Deep Research sessions", () => { + const meta = makeSession({ + browser: { config: { deepResearch: true } }, + }); + const row = formatSessionTableRow(meta, { rich: false }); + expect(row).toContain("browser/dr"); + }); + + test("shows regular browser for non-Deep Research sessions", () => { + const meta = makeSession({ + browser: { config: { deepResearch: false } }, + }); + const row = formatSessionTableRow(meta, { rich: false }); + expect(row).toContain("browser"); + expect(row).not.toContain("browser/dr"); + }); + + test("shows regular browser when browser config is absent", () => { + const meta = makeSession(); + const row = formatSessionTableRow(meta, { rich: false }); + expect(row).toContain("browser"); + expect(row).not.toContain("browser/dr"); + }); +});