diff --git a/src/journal/handoff-summary.ts b/src/journal/handoff-summary.ts new file mode 100644 index 00000000..6b313ef9 --- /dev/null +++ b/src/journal/handoff-summary.ts @@ -0,0 +1,236 @@ +import { JournalEntry, TaskJournal } from './task-journal'; + +const MAX_FAILURE_GROUPS = 5; +const MAX_MILESTONES = 10; +const MAX_PENDING_STEPS = 10; +const MAX_RECOMMENDATIONS = 5; +const MAX_RECENT_ENTRIES = 1000; + +export interface HandoffCheckpointState { + timestamp?: number; + taskDescription?: string; + completedSteps?: string[]; + pendingSteps?: string[]; + currentUrl?: string | null; + tabStates?: Array<{ tabId: string; url: string; title: string }>; +} + +export interface HandoffSummaryOptions { + since?: number; + sessionId?: string; + checkpointId?: string; + checkpoint?: HandoffCheckpointState | null; + now?: number; +} + +export interface HandoffSummary { + schemaVersion: 1; + period: { + start: string | null; + end: string; + since: string | null; + sourceCheckpointId: string | null; + }; + currentState: { + sessionId: string | null; + currentUrl: string | null; + tabs: Array<{ tabId: string; url: string; title: string }>; + tabHealth: { status: 'unavailable'; reason: string }; + unavailable?: string[]; + }; + completedMilestones: Array<{ + ts: string; + tool: string; + sessionId: string; + tabId?: string; + summary: string; + }>; + recentFailures: Array<{ + tool: string; + sessionId: string; + tabId?: string; + count: number; + firstSeen: string; + lastSeen: string; + errorClass: string; + signature: string; + sampleSummary: string; + }>; + stuckSignals: { + items: Array<{ ts: string; tool: string; summary: string }>; + unavailable?: { reason: string }; + }; + pendingSteps: string[]; + recommendedRecoveryOptions: Array<{ reason: string; action: string }>; + limits: string[]; +} + +function iso(ts: number | undefined): string | null { + return typeof ts === 'number' && Number.isFinite(ts) ? new Date(ts).toISOString() : null; +} + +function stableStringify(value: unknown): string { + if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`; + if (value && typeof value === 'object') { + const entries = Object.entries(value as Record) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([key, val]) => `${JSON.stringify(key)}:${stableStringify(val)}`); + return `{${entries.join(',')}}`; + } + return JSON.stringify(value); +} + +function signatureFor(entry: JournalEntry): string { + return `${entry.tool}:${stableStringify(entry.args)}`; +} + +function entryMatchesSession(entry: JournalEntry, sessionId: string): boolean { + return entry.sessionId === sessionId || entry.args.sessionId === sessionId; +} + +function displaySessionId(entry: JournalEntry): string { + return typeof entry.args.sessionId === 'string' ? entry.args.sessionId : entry.sessionId; +} + +function filterEntries(entries: JournalEntry[], opts: HandoffSummaryOptions): JournalEntry[] { + return entries.filter(entry => { + if (opts.sessionId && !entryMatchesSession(entry, opts.sessionId)) return false; + if (opts.since && entry.ts < opts.since) return false; + if (opts.checkpoint?.timestamp && entry.ts < opts.checkpoint.timestamp) return false; + return true; + }); +} + +function latestSessionId(entries: JournalEntry[], requested?: string): string | null { + if (requested) return requested; + for (let i = entries.length - 1; i >= 0; i -= 1) { + const id = displaySessionId(entries[i]); + if (id) return id; + } + return null; +} + +function buildFailureGroups(entries: JournalEntry[]): HandoffSummary['recentFailures'] { + const groups = new Map(); + for (const entry of entries.filter(e => !e.ok)) { + const key = `${entry.sessionId}:${entry.tabId ?? 'none'}:${signatureFor(entry)}`; + const group = groups.get(key) ?? []; + group.push(entry); + groups.set(key, group); + } + + return Array.from(groups.values()) + .sort((a, b) => b[b.length - 1].ts - a[a.length - 1].ts) + .slice(0, MAX_FAILURE_GROUPS) + .map(group => { + const first = group[0]; + const last = group[group.length - 1]; + return { + tool: first.tool, + sessionId: displaySessionId(first), + tabId: first.tabId, + count: group.length, + firstSeen: iso(first.ts)!, + lastSeen: iso(last.ts)!, + errorClass: 'unavailable:journals_store_success_flag_only', + signature: signatureFor(first).slice(0, 240), + sampleSummary: first.summary, + }; + }); +} + +function buildRecommendations(summary: Pick): HandoffSummary['recommendedRecoveryOptions'] { + const recs: HandoffSummary['recommendedRecoveryOptions'] = []; + const failedTools = new Set(summary.recentFailures.map(f => f.tool)); + + if (failedTools.has('find') || failedTools.has('query_dom') || failedTools.has('read_page')) { + recs.push({ reason: 'Recent page-observation failures were recorded.', action: 'Refresh the DOM snapshot and retry with a broader selector or text query.' }); + } + if (failedTools.has('interact') || failedTools.has('fill_form')) { + recs.push({ reason: 'Recent interaction failures were recorded.', action: 'Re-read the page and choose a currently visible, stable target before retrying.' }); + } + if (summary.currentState.currentUrl) { + recs.push({ reason: 'Checkpoint includes a current URL.', action: 'Resume from the latest tab evidence before repeating earlier completed steps.' }); + } + if (summary.pendingSteps.length > 0) { + recs.push({ reason: 'Checkpoint includes pending steps.', action: 'Continue with the first pending step after validating the page is still on the expected URL.' }); + } + if (recs.length === 0) { + recs.push({ reason: 'No specific failure pattern is available.', action: 'Load the checkpoint or call oc_journal recent before retrying expensive work.' }); + } + + return recs.slice(0, MAX_RECOMMENDATIONS); +} + +export function buildHandoffSummary(journal: TaskJournal, opts: HandoffSummaryOptions = {}): HandoffSummary { + const now = opts.now ?? Date.now(); + const allEntries = journal.getRecent(MAX_RECENT_ENTRIES); + const entries = filterEntries(allEntries, opts); + const checkpoint = opts.checkpoint ?? null; + const startTs = entries[0]?.ts ?? checkpoint?.timestamp ?? opts.since; + const endTs = entries[entries.length - 1]?.ts ?? now; + const journalMilestones = entries + .filter(entry => entry.milestone) + .map(entry => ({ + ts: iso(entry.ts)!, + tool: entry.tool, + sessionId: displaySessionId(entry), + tabId: entry.tabId, + summary: entry.summary, + })); + const checkpointMilestones = (checkpoint?.completedSteps ?? []).map(step => ({ + ts: iso(checkpoint?.timestamp ?? now)!, + tool: 'oc_checkpoint', + sessionId: opts.sessionId ?? latestSessionId(entries) ?? 'unknown', + summary: `✓ ${step}`, + })); + const completedMilestones = [...journalMilestones, ...checkpointMilestones].slice(-MAX_MILESTONES); + const pendingSteps = (checkpoint?.pendingSteps ?? []).slice(0, MAX_PENDING_STEPS); + const currentState: HandoffSummary['currentState'] = { + sessionId: latestSessionId(entries, opts.sessionId), + currentUrl: checkpoint?.currentUrl ?? checkpoint?.tabStates?.[0]?.url ?? null, + tabs: (checkpoint?.tabStates ?? []).slice(0, 20), + tabHealth: { status: 'unavailable', reason: 'checkpoint artifacts do not persist live tab health' }, + }; + const unavailable: string[] = []; + if (!checkpoint) unavailable.push('checkpoint_state'); + if (!currentState.currentUrl && currentState.tabs.length === 0) unavailable.push('tab_state'); + if (unavailable.length > 0) currentState.unavailable = unavailable; + + const partial = { + recentFailures: buildFailureGroups(entries), + currentState, + pendingSteps, + }; + + const limits = [ + `journal entries scanned: ${Math.min(allEntries.length, MAX_RECENT_ENTRIES)} / ${MAX_RECENT_ENTRIES}`, + `milestones capped at ${MAX_MILESTONES}`, + `failure groups capped at ${MAX_FAILURE_GROUPS}`, + 'tool arguments are read from sanitized journal entries only', + ]; + if (opts.checkpointId && opts.checkpointId !== 'current') { + limits.push('checkpointId is recorded in the response but only the current checkpoint file exists in this OpenChrome version'); + } + if (entries.length === 0) limits.push('no journal entries matched the requested scope'); + + return { + schemaVersion: 1, + period: { + start: iso(startTs), + end: iso(endTs)!, + since: iso(opts.since), + sourceCheckpointId: opts.checkpointId ?? (checkpoint ? 'current' : null), + }, + currentState, + completedMilestones, + recentFailures: partial.recentFailures, + stuckSignals: { + items: [], + unavailable: { reason: 'HintEngine/ProgressTracker stuck signals are not persisted in journal artifacts yet' }, + }, + pendingSteps, + recommendedRecoveryOptions: buildRecommendations(partial), + limits, + }; +} diff --git a/src/mcp-server.ts b/src/mcp-server.ts index d68028ec..fc8090c5 100644 --- a/src/mcp-server.ts +++ b/src/mcp-server.ts @@ -1844,7 +1844,8 @@ export class MCPServer { // Record to task journal try { const journal = getTaskJournal(); - const entry = journal.createEntry(toolName, sessionId, telemetryToolArgs, Date.now() - toolStartTime, true); + const toolSucceeded = (result as MCPResult).isError !== true; + const entry = journal.createEntry(toolName, sessionId, telemetryToolArgs, Date.now() - toolStartTime, toolSucceeded); journal.record(entry); } catch { // Best-effort journal recording diff --git a/src/tools/checkpoint.ts b/src/tools/checkpoint.ts index 9e756077..59f605de 100644 --- a/src/tools/checkpoint.ts +++ b/src/tools/checkpoint.ts @@ -15,7 +15,7 @@ import { safeTitle } from '../utils/safe-title'; // ─── Types ───────────────────────────────────────────────────────────────── -interface AutomationCheckpoint { +export interface AutomationCheckpoint { version: 1; timestamp: number; taskDescription: string; @@ -121,6 +121,12 @@ async function collectTabStates(): Promise { + const checkpointPath = path.join(CHECKPOINT_DIR, CHECKPOINT_FILE); + const result = await readFileSafe(checkpointPath); + return result.success && result.data ? result.data : null; +} + const handler: ToolHandler = async ( _sessionId: string, args: Record, @@ -168,8 +174,8 @@ const handler: ToolHandler = async ( } if (action === 'load') { - const result = await readFileSafe(checkpointPath); - if (!result.success || !result.data) { + const cp = await readCurrentCheckpoint(); + if (!cp) { return { content: [ { @@ -180,7 +186,6 @@ const handler: ToolHandler = async ( }; } - const cp = result.data; const ageMs = Date.now() - cp.timestamp; const ageHours = Math.round((ageMs / 3600000) * 10) / 10; diff --git a/src/tools/journal.ts b/src/tools/journal.ts index 561de0ff..eae5921a 100644 --- a/src/tools/journal.ts +++ b/src/tools/journal.ts @@ -6,17 +6,19 @@ import { MCPServer } from '../mcp-server'; import { MCPToolDefinition, MCPResult, ToolHandler } from '../types/mcp'; import { getTaskJournal } from '../journal/task-journal'; +import { buildHandoffSummary } from '../journal/handoff-summary'; +import { readCurrentCheckpoint } from './checkpoint'; const definition: MCPToolDefinition = { name: 'oc_journal', description: - 'Query the tool call journal. Actions: "summary" (milestone-based overview for context restoration), "recent" (last N entries with full detail).\n\nWhen to use: Reviewing session history, restoring context after a long task, or auditing what tools ran.\nWhen NOT to use: Use read_page or inspect to check the current live page state rather than past actions.', + 'Query the tool call journal. Actions: "summary" (milestone overview), "recent" (last N entries), "handoff_summary" (compact JSON resume handoff).\nWhen to use: Reviewing session history, restoring context, or auditing past tool calls.\nWhen NOT to use: Use read_page or inspect to check the current live page state.', inputSchema: { type: 'object', properties: { action: { type: 'string', - enum: ['summary', 'recent'], + enum: ['summary', 'recent', 'handoff_summary'], description: 'Query type', }, count: { @@ -27,6 +29,18 @@ const definition: MCPToolDefinition = { type: 'string', description: 'Filter by tool name', }, + sessionId: { + type: 'string', + description: '(handoff_summary) Limit journal evidence to one session id', + }, + checkpointId: { + type: 'string', + description: '(handoff_summary) Source checkpoint id. Only "current" is backed by the existing checkpoint store.', + }, + includeCheckpoint: { + type: 'boolean', + description: '(handoff_summary) Include the current checkpoint file when available. Default: true', + }, since: { type: 'string', description: 'ISO timestamp or relative ("1h", "30m")', @@ -96,6 +110,23 @@ const handler: ToolHandler = async ( return { content: [{ type: 'text', text: lines.join('\n') }] }; } + + if (action === 'handoff_summary') { + const includeCheckpoint = args.includeCheckpoint !== false; + const checkpoint = includeCheckpoint ? await readCurrentCheckpoint() : null; + const handoff = buildHandoffSummary(journal, { + since, + sessionId: args.sessionId as string | undefined, + checkpointId: args.checkpointId as string | undefined, + checkpoint, + }); + + return { + content: [{ type: 'text', text: JSON.stringify(handoff, null, 2) }], + handoffSummary: handoff, + }; + } + if (action === 'recent') { const count = Math.min(Math.max((args.count as number) || 20, 1), 100); let entries = journal.getRecent(count); @@ -122,7 +153,7 @@ const handler: ToolHandler = async ( return { content: [{ type: 'text', text: lines.join('\n') }] }; } - return { content: [{ type: 'text', text: `Unknown action: ${action}. Use "summary" or "recent".` }] }; + return { content: [{ type: 'text', text: `Unknown action: ${action}. Use "summary", "recent", or "handoff_summary".` }] }; }; export function registerJournalTool(server: MCPServer): void { diff --git a/tests/e2e/scenarios/journal-handoff.e2e.ts b/tests/e2e/scenarios/journal-handoff.e2e.ts new file mode 100644 index 00000000..ca5a4d30 --- /dev/null +++ b/tests/e2e/scenarios/journal-handoff.e2e.ts @@ -0,0 +1,121 @@ +/** + * E2E: oc_journal handoff_summary for long-running session recovery (#1027) + * + * Validates that a real OpenChrome MCP server can produce a compact handoff + * summary from persisted journal/checkpoint artifacts, and that the summary is + * still available after an MCP process restart with the same HOME. + */ +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { MCPClient, MCPToolResult } from '../harness/mcp-client'; + +function getFixturePort(): number { + const stateFile = path.join(process.cwd(), '.e2e-state.json'); + const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8')); + return state.port; +} + +function parseJsonText(result: MCPToolResult): Record { + return parseFirstJsonObject(result.text); +} + +function parseFirstJsonObject(text: string): Record { + const start = text.indexOf('{'); + if (start < 0) throw new Error(`No JSON object in text: ${text}`); + let depth = 0; + let inString = false; + let escaped = false; + for (let i = start; i < text.length; i += 1) { + const ch = text[i]; + if (inString) { + if (escaped) { escaped = false; continue; } + if (ch === '\\') { escaped = true; continue; } + if (ch === '"') inString = false; + continue; + } + if (ch === '"') { inString = true; continue; } + if (ch === '{') depth += 1; + if (ch === '}') { + depth -= 1; + if (depth === 0) return JSON.parse(text.slice(start, i + 1)) as Record; + } + } + throw new Error(`Unterminated JSON object in text: ${text}`); +} + +describe('E2E: journal handoff summary (#1027)', () => { + let mcp: MCPClient; + let homeDir: string; + + beforeAll(async () => { + homeDir = fs.mkdtempSync(path.join(os.tmpdir(), 'openchrome-handoff-e2e-')); + mcp = new MCPClient({ timeoutMs: 60_000, env: { HOME: homeDir } }); + await mcp.start(); + }, 90_000); + + afterAll(async () => { + await mcp.stop(); + fs.rmSync(homeDir, { recursive: true, force: true }); + }, 30_000); + + test('summary includes checkpoint state, milestones, grouped failures, and survives restart', async () => { + const port = getFixturePort(); + const sessionId = 'handoff-e2e-session'; + const testUrl = `http://localhost:${port}/site-a`; + + const nav = await mcp.callTool('navigate', { sessionId, url: testUrl }); + expect(nav.text).toContain('tabId'); + const navData = parseFirstJsonObject(nav.text); + const tabId = navData.tabId as string; + + await mcp.callTool('read_page', { sessionId, tabId }); + + const checkpoint = await mcp.callTool('oc_checkpoint', { + action: 'save', + taskDescription: 'Handoff E2E task', + completedSteps: ['navigate fixture', 'read fixture page'], + pendingSteps: ['recover missing selector'], + extractedData: { fixture: 'site-a' }, + }); + expect(checkpoint.text).toContain('saved'); + + await mcp.callTool('javascript_tool', { + sessionId, + tabId, + code: 'throw new Error(\"handoff failure\")', + apiKey: 'should-be-redacted', + }); + + const handoffResult = await mcp.callTool('oc_journal', { + action: 'handoff_summary', + sessionId, + checkpointId: 'current', + }); + const handoff = parseJsonText(handoffResult); + + expect(handoff.schemaVersion).toBe(1); + expect((handoff.currentState as Record).sessionId).toBe(sessionId); + expect((handoff.currentState as Record).currentUrl).toContain('/site-a'); + expect((handoff.completedMilestones as unknown[]).length).toBeGreaterThanOrEqual(1); + expect((handoff.pendingSteps as unknown[])).toContain('recover missing selector'); + expect(JSON.stringify(handoff.recentFailures)).toContain('javascript_tool'); + expect(JSON.stringify(handoff.recentFailures)).toContain('[REDACTED]'); + expect(JSON.stringify(handoff.recentFailures)).not.toContain('should-be-redacted'); + expect((handoff.recommendedRecoveryOptions as unknown[]).length).toBeGreaterThan(0); + expect(JSON.stringify(handoff.limits)).toContain('journal entries scanned'); + + await mcp.restart(); + + const resumedResult = await mcp.callTool('oc_journal', { + action: 'handoff_summary', + sessionId, + checkpointId: 'current', + }); + const resumed = parseJsonText(resumedResult); + + expect((resumed.currentState as Record).currentUrl).toContain('/site-a'); + expect((resumed.pendingSteps as unknown[])).toContain('recover missing selector'); + expect(JSON.stringify(resumed.recentFailures)).toContain('javascript_tool'); + }, 120_000); +}); diff --git a/tests/journal/handoff-summary.test.ts b/tests/journal/handoff-summary.test.ts new file mode 100644 index 00000000..c18022ca --- /dev/null +++ b/tests/journal/handoff-summary.test.ts @@ -0,0 +1,164 @@ +/// + +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { buildHandoffSummary } from '../../src/journal/handoff-summary'; +import { JournalEntry, TaskJournal } from '../../src/journal/task-journal'; + +function tmpDir(): string { + const dir = path.join(os.tmpdir(), `handoff-summary-test-${Math.random().toString(36).slice(2)}`); + fs.mkdirSync(dir, { recursive: true }); + return dir; +} + +function entry(overrides: Partial = {}): JournalEntry { + return { + ts: Date.parse('2026-05-12T10:00:00.000Z'), + tool: 'navigate', + sessionId: 'sess-a', + args: { url: 'https://example.test', token: '[REDACTED]' }, + durationMs: 10, + ok: true, + summary: '✓ → https://example.test', + milestone: true, + ...overrides, + }; +} + +describe('buildHandoffSummary', () => { + let dir: string; + let journal: TaskJournal; + + beforeEach(() => { + dir = tmpDir(); + journal = new TaskJournal({ dir }); + }); + + afterEach(() => { + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns an empty but valid bounded summary without journal or checkpoint data', () => { + const summary = buildHandoffSummary(journal, { now: Date.parse('2026-05-12T11:00:00.000Z') }); + + expect(summary.schemaVersion).toBe(1); + expect(summary.completedMilestones).toEqual([]); + expect(summary.recentFailures).toEqual([]); + expect(summary.currentState.unavailable).toContain('checkpoint_state'); + expect(summary.currentState.unavailable).toContain('tab_state'); + expect(summary.recommendedRecoveryOptions[0].action).toContain('oc_journal recent'); + expect(JSON.stringify(summary).length).toBeLessThan(5000); + }); + + it('includes ordered milestones and checkpoint pending state', () => { + journal.record(entry({ ts: Date.parse('2026-05-12T10:00:00.000Z'), summary: '✓ → https://a.test' })); + journal.record(entry({ ts: Date.parse('2026-05-12T10:02:00.000Z'), tool: 'fill_form', summary: '✓ Fill form (1 fields)' })); + + const summary = buildHandoffSummary(journal, { + checkpoint: { + timestamp: Date.parse('2026-05-12T09:59:00.000Z'), + pendingSteps: ['submit search', 'collect results'], + currentUrl: 'https://a.test/form', + tabStates: [{ tabId: 'tab-1', url: 'https://a.test/form', title: 'Form' }], + }, + }); + + expect(summary.currentState.currentUrl).toBe('https://a.test/form'); + expect(summary.currentState.tabs).toHaveLength(1); + expect(summary.completedMilestones.map(m => m.summary)).toEqual(['✓ → https://a.test', '✓ Fill form (1 fields)']); + expect(summary.pendingSteps).toEqual(['submit search', 'collect results']); + expect(summary.recommendedRecoveryOptions.some(r => r.reason.includes('pending steps'))).toBe(true); + }); + + + + it('surfaces checkpoint completed steps as resume milestones when journal entries are outside checkpoint scope', () => { + journal.record(entry({ ts: Date.parse('2026-05-12T09:00:00.000Z'), summary: '✓ before checkpoint' })); + + const summary = buildHandoffSummary(journal, { + sessionId: 'sess-a', + checkpoint: { + timestamp: Date.parse('2026-05-12T10:00:00.000Z'), + completedSteps: ['already completed'], + }, + }); + + expect(summary.completedMilestones.map(m => m.summary)).toEqual(['✓ already completed']); + expect(summary.completedMilestones[0].tool).toBe('oc_checkpoint'); + }); + + it('groups failures by sanitized signature without leaking sensitive values', () => { + journal.record(entry({ + ok: false, + tool: 'find', + summary: '✗ Find "submit"', + milestone: undefined, + args: { selector: '#submit', password: '[REDACTED]' }, + })); + journal.record(entry({ + ts: Date.parse('2026-05-12T10:01:00.000Z'), + ok: false, + tool: 'find', + summary: '✗ Find "submit"', + milestone: undefined, + args: { password: '[REDACTED]', selector: '#submit' }, + })); + + const summary = buildHandoffSummary(journal); + + expect(summary.recentFailures).toHaveLength(1); + expect(summary.recentFailures[0].count).toBe(2); + expect(summary.recentFailures[0].signature).toContain('[REDACTED]'); + expect(summary.recentFailures[0].signature).not.toContain('hunter2'); + expect(summary.recommendedRecoveryOptions[0].action).toContain('Refresh the DOM snapshot'); + }); + + it('scopes by since, checkpoint timestamp, and session id', () => { + journal.record(entry({ ts: Date.parse('2026-05-12T09:00:00.000Z'), sessionId: 'sess-a', summary: '✓ old' })); + journal.record(entry({ ts: Date.parse('2026-05-12T10:00:00.000Z'), sessionId: 'sess-b', summary: '✓ other session' })); + journal.record(entry({ ts: Date.parse('2026-05-12T10:01:00.000Z'), sessionId: 'sess-a', summary: '✓ included' })); + + const summary = buildHandoffSummary(journal, { + since: Date.parse('2026-05-12T09:30:00.000Z'), + sessionId: 'sess-a', + checkpoint: { timestamp: Date.parse('2026-05-12T10:00:30.000Z') }, + }); + + expect(summary.completedMilestones.map(m => m.summary)).toEqual(['✓ included']); + expect(summary.currentState.sessionId).toBe('sess-a'); + }); + + + + it('scopes to caller-provided sessionId stored in sanitized tool args', () => { + journal.record(entry({ sessionId: 'mcp-default', args: { sessionId: 'logical-session' }, summary: '✓ logical' })); + + const summary = buildHandoffSummary(journal, { sessionId: 'logical-session' }); + + expect(summary.currentState.sessionId).toBe('logical-session'); + expect(summary.completedMilestones.map(m => m.sessionId)).toEqual(['logical-session']); + }); + + it('caps large synthetic output deterministically', () => { + for (let i = 0; i < 50; i += 1) { + journal.record(entry({ + ts: Date.parse('2026-05-12T10:00:00.000Z') + i, + ok: i % 2 === 0, + tool: i % 2 === 0 ? 'navigate' : 'interact', + args: { selector: `#button-${i}`, token: '[REDACTED]' }, + summary: `${i % 2 === 0 ? '✓' : '✗'} step ${i}`, + milestone: true, + })); + } + + const summary = buildHandoffSummary(journal, { + checkpoint: { pendingSteps: Array.from({ length: 30 }, (_, i) => `pending ${i}`) }, + }); + + expect(summary.completedMilestones).toHaveLength(10); + expect(summary.recentFailures).toHaveLength(5); + expect(summary.pendingSteps).toHaveLength(10); + expect(JSON.stringify(summary).length).toBeLessThan(12000); + }); +}); diff --git a/tests/tools/journal.test.ts b/tests/tools/journal.test.ts index 8322cd9c..47d32c15 100644 --- a/tests/tools/journal.test.ts +++ b/tests/tools/journal.test.ts @@ -283,6 +283,44 @@ describe('oc_journal tool', () => { }); }); + + // ─── action=handoff_summary ───────────────────────────────────────────── + + describe('action=handoff_summary', () => { + test('returns structured handoff summary JSON and result field', async () => { + const now = Date.parse('2026-05-12T10:00:00.000Z'); + mockGetRecent.mockReturnValue([ + makeEntry({ ts: now, sessionId: 'sess-handoff', summary: '✓ → https://example.com' }), + makeEntry({ + ts: now + 1, + sessionId: 'sess-handoff', + tool: 'find', + ok: false, + milestone: undefined, + summary: '✗ Find "missing"', + args: { query: 'missing', apiKey: '[REDACTED]' }, + }), + ]); + + const result = await handler('default', { + action: 'handoff_summary', + sessionId: 'sess-handoff', + since: '2026-05-12T09:00:00.000Z', + includeCheckpoint: false, + }); + const text: string = result.content[0].text; + const parsed = JSON.parse(text); + + expect(parsed.schemaVersion).toBe(1); + expect(parsed.currentState.sessionId).toBe('sess-handoff'); + expect(parsed.completedMilestones).toHaveLength(1); + expect(parsed.recentFailures).toHaveLength(1); + expect(parsed.recentFailures[0].signature).toContain('[REDACTED]'); + expect(parsed.recommendedRecoveryOptions.length).toBeGreaterThan(0); + expect(result.handoffSummary).toEqual(parsed); + }); + }); + // ─── unknown action ────────────────────────────────────────────────────── describe('unknown action', () => { @@ -297,6 +335,7 @@ describe('oc_journal tool', () => { expect(result.content[0].text).toContain('Unknown action: invalid'); expect(result.content[0].text).toContain('"summary"'); expect(result.content[0].text).toContain('"recent"'); + expect(result.content[0].text).toContain('"handoff_summary"'); }); });