diff --git a/src/mcp-server.ts b/src/mcp-server.ts index e781c19f..ebaf4a1a 100644 --- a/src/mcp-server.ts +++ b/src/mcp-server.ts @@ -61,11 +61,13 @@ import { extractRunId, getRunStore } from './run-harness/store'; import { substituteSecrets, redactSecrets, + redactSecretString, MissingSecretError, getSecretStore, } from './core/secrets'; import { currentRequestContext } from './observability/request-id'; import type { TransportMessageContext } from './transports'; +import { RecoveryTrajectoryLedger, type RecoveryResultStatus } from './recovery'; function redactActVariablesForTelemetry(toolName: string, args: Record): Record { @@ -335,6 +337,7 @@ export class MCPServer { private activityTracker: ActivityTracker | null = null; private operationController: OperationController | null = null; private hintEngine: HintEngine | null = null; + private recoveryLedger: RecoveryTrajectoryLedger | null = null; private options: MCPServerOptions; private profileWarningShown = false; private exposedTier: ToolTier = 1; @@ -438,6 +441,14 @@ export class MCPServer { this.hintEngine.enableLogging(hintsDir); this.hintEngine.enableLearning(hintsDir); + // Initialize passive recovery trajectory ledger (#1017). Default-on with the + // existing .openchrome harness logs; set OPENCHROME_RECOVERY_LEDGER=0 to disable. + if (process.env.OPENCHROME_RECOVERY_LEDGER !== '0') { + this.recoveryLedger = new RecoveryTrajectoryLedger({ + dirPath: path.join(process.cwd(), '.openchrome', 'recovery'), + }); + } + // Initialize task journal getTaskJournal().init().catch((err: unknown) => { console.error('[MCPServer] Task journal init failed:', err); @@ -1812,6 +1823,8 @@ export class MCPServer { // End activity tracking (success) this.activityTracker!.endCall(callId, 'success'); + result = redactSecrets(result); + this.recordRecoveryTrajectory(callId, toolName, sessionId, toolArgs, result.isError ? 'no_progress' : 'success', result); getDashboardState().recordToolEnd(callId, 'success'); // Record Prometheus metrics @@ -1999,11 +2012,13 @@ export class MCPServer { return finalResult; } catch (error) { const message = formatError(error); + const redactedMessage = redactSecretString(message); const abortReason = isClientDisconnect(error) ? 'client_disconnect' : null; const aborted = abortReason !== null; // End activity tracking (error) this.activityTracker!.endCall(callId, aborted ? 'aborted' : 'error', message); + this.recordRecoveryTrajectory(callId, toolName, sessionId, toolArgs, aborted ? 'aborted' : 'error', undefined, redactedMessage); getDashboardState().recordToolEnd(callId, aborted ? 'aborted' : 'error', message); // Audit log failed invocation — same correlation fields as success path. @@ -2430,6 +2445,53 @@ export class MCPServer { * Get a tool handler by name (for internal server-side plan execution). * Returns null if the tool is not registered. */ + + private recordRecoveryTrajectory( + callId: string, + toolName: string, + sessionId: string, + toolArgs: Record, + resultStatus: RecoveryResultStatus, + result?: MCPResult, + error?: string, + ): void { + if (!this.recoveryLedger || !this.activityTracker) return; + + try { + const recent = this.activityTracker.getRecentCalls(3, sessionId); + const current = recent.find((call) => call.id === callId); + const tabId = typeof toolArgs.tabId === 'string' ? toolArgs.tabId : undefined; + const previousTrajectory = this.recoveryLedger.getLastNode(sessionId, tabId); + const previousFailed = + previousTrajectory?.resultStatus === 'error' || + previousTrajectory?.resultStatus === 'no_progress' || + previousTrajectory?.resultStatus === 'aborted'; + const recovered = + resultStatus === 'success' && + previousTrajectory !== undefined && + previousFailed && + previousTrajectory.toolName !== toolName; + const progressStatus = + resultStatus === 'error' || resultStatus === 'no_progress' || current?.result === 'error' + ? 'stuck' + : 'unknown'; + + this.recoveryLedger.record({ + sessionId, + tabId, + toolName, + args: toolArgs, + resultStatus: recovered ? 'recovered' : resultStatus, + progressStatus, + error, + result, + recoveryTool: recovered ? toolName : undefined, + }); + } catch { + // Recovery telemetry is best-effort and must not affect tool behavior. + } + } + getToolHandler(toolName: string): ToolHandler | null { const registry = this.tools.get(toolName); return registry ? registry.handler : null; diff --git a/src/recovery/index.ts b/src/recovery/index.ts new file mode 100644 index 00000000..61ec2f87 --- /dev/null +++ b/src/recovery/index.ts @@ -0,0 +1,12 @@ +export { + RecoveryTrajectoryLedger, + summarizeArgs, + summarizeResult, +} from './trajectory-ledger'; +export type { + RecoveryProgressStatus, + RecoveryResultStatus, + RecoveryTrajectoryLedgerOptions, + RecoveryTrajectoryNode, + RecoveryTrajectoryNodeInput, +} from './trajectory-ledger'; diff --git a/src/recovery/trajectory-ledger.ts b/src/recovery/trajectory-ledger.ts new file mode 100644 index 00000000..4eeb603d --- /dev/null +++ b/src/recovery/trajectory-ledger.ts @@ -0,0 +1,434 @@ +/** + * RecoveryTrajectoryLedger — bounded, best-effort telemetry for tool attempts. + * + * Records compact JSONL nodes that describe successful, failed, and recovered + * attempts without storing raw secrets, cookies, headers, screenshots, or full + * DOM payloads. The ledger is intentionally passive: it never replays actions + * or changes browser behavior. + */ +import * as crypto from 'node:crypto'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +import { redactSecretString } from '../core/secrets'; + +export type RecoveryResultStatus = 'success' | 'error' | 'no_progress' | 'recovered' | 'aborted'; +export type RecoveryProgressStatus = 'progressing' | 'stalling' | 'stuck' | 'unknown'; + +export interface RecoveryTrajectoryNodeInput { + sessionId: string; + workflowId?: string; + tabId?: string; + parentNodeId?: string; + toolName: string; + args?: Record; + resultStatus: RecoveryResultStatus; + progressStatus?: RecoveryProgressStatus; + error?: string; + result?: Record; + failureFingerprint?: string; + recoveryTool?: string; + evidenceHandle?: string; + observationSummary?: string; + reward?: number | null; +} + +export interface RecoveryTrajectoryNode { + nodeId: string; + timestamp: number; + sessionId: string; + workflowId?: string; + tabId?: string; + parentNodeId?: string; + toolName: string; + argsSummary?: Record; + resultStatus: RecoveryResultStatus; + progressStatus: RecoveryProgressStatus; + failureFingerprint?: string; + recoveryTool?: string; + evidenceHandle?: string; + observationSummary?: string; + reward?: number | null; +} + +export interface RecoveryTrajectoryLedgerOptions { + dirPath?: string; + fileName?: string; + maxNodes?: number; + maxNodeBytes?: number; + maxFileBytes?: number; +} + +const DEFAULT_MAX_NODES = 500; +const DEFAULT_MAX_NODE_BYTES = 4096; +const DEFAULT_MAX_FILE_BYTES = 512 * 1024; +const SUMMARY_MAX_CHARS = 500; +const REDACTED = '[REDACTED]'; +const HASHED_PREFIX = 'sha256:'; + +const SENSITIVE_KEY_RE = /(^|[_-])(password|passwd|pass|pwd|secret|token|api[_-]?key|authorization|auth|cookie|set-cookie|session|credential|private[_-]?key|otp|totp|pin)($|[_-])/i; +const LARGE_VALUE_KEY_RE = /(html|dom|screenshot|image|data|body|content|headers?)/i; + +export class RecoveryTrajectoryLedger { + private readonly dirPath: string; + private readonly filePath: string; + private readonly maxNodes: number; + private readonly maxNodeBytes: number; + private readonly maxFileBytes: number; + private readonly maxSessionIndexEntries: number; + private lastNodeBySession = new Map(); + private lastNodeByContext = new Map(); + private pendingNodes: RecoveryTrajectoryNode[] = []; + private writeQueue: Promise = Promise.resolve(); + + constructor(options: RecoveryTrajectoryLedgerOptions = {}) { + this.dirPath = options.dirPath ?? path.join(process.cwd(), '.openchrome', 'recovery'); + this.filePath = path.join(this.dirPath, options.fileName ?? 'trajectory.jsonl'); + this.maxNodes = Math.max(1, options.maxNodes ?? readIntEnv('OPENCHROME_RECOVERY_LEDGER_MAX_NODES', DEFAULT_MAX_NODES)); + this.maxNodeBytes = Math.max(512, options.maxNodeBytes ?? readIntEnv('OPENCHROME_RECOVERY_LEDGER_MAX_NODE_BYTES', DEFAULT_MAX_NODE_BYTES)); + this.maxFileBytes = Math.max(this.maxNodeBytes, options.maxFileBytes ?? readIntEnv('OPENCHROME_RECOVERY_LEDGER_MAX_FILE_BYTES', DEFAULT_MAX_FILE_BYTES)); + this.maxSessionIndexEntries = Math.max(16, this.maxNodes); + } + + getPath(): string { + return this.filePath; + } + + /** Best-effort append. Returns the built node when persistence is queued, null when skipped before queueing. */ + record(input: RecoveryTrajectoryNodeInput): RecoveryTrajectoryNode | null { + try { + const node = this.buildNode(input); + const serialized = this.serializeBounded(node); + this.lastNodeBySession.delete(input.sessionId); + this.lastNodeBySession.set(input.sessionId, node.nodeId); + this.lastNodeByContext.delete(contextKey(input.sessionId, node.tabId)); + this.lastNodeByContext.set(contextKey(input.sessionId, node.tabId), node); + this.pruneSessionIndex(); + this.pendingNodes.push(node); + this.prunePendingNodes(); + this.queuePersist(node, serialized); + return node; + } catch (err) { + console.error(`[RecoveryTrajectoryLedger] record skipped: ${err instanceof Error ? err.message : String(err)}`); + return null; + } + } + + readRecent(limit = 50, sessionId?: string): RecoveryTrajectoryNode[] { + try { + const content = fs.readFileSync(this.filePath, 'utf8'); + const nodes = content.trim().length === 0 + ? [] + : parseRecoveryNodes(content.trim().split('\n')); + const merged = this.mergePending(nodes); + const filtered = sessionId ? merged.filter((n) => n.sessionId === sessionId) : merged; + return filtered.slice(-Math.max(0, limit)); + } catch { + const filtered = sessionId ? this.pendingNodes.filter((n) => n.sessionId === sessionId) : this.pendingNodes; + return filtered.slice(-Math.max(0, limit)); + } + } + + /** Return the last in-memory node for this session/tab without touching disk. */ + getLastNode(sessionId: string, tabId?: string): RecoveryTrajectoryNode | undefined { + return this.lastNodeByContext.get(contextKey(sessionId, tabId)); + } + + /** Test hook for queued best-effort writes. Not needed by normal callers. */ + async flush(): Promise { + await this.writeQueue; + } + + private buildNode(input: RecoveryTrajectoryNodeInput): RecoveryTrajectoryNode { + const parentNodeId = input.parentNodeId ?? this.lastNodeBySession.get(input.sessionId); + const observationSummary = input.observationSummary + ?? summarizeResult(input.result) + ?? summarizeText(input.error); + const failureFingerprint = input.failureFingerprint + ?? (input.error ? fingerprint(input.error) : undefined); + const tabId = input.tabId ?? readString(input.args?.tabId); + + return pruneUndefined({ + nodeId: crypto.randomUUID(), + timestamp: Date.now(), + sessionId: input.sessionId, + workflowId: input.workflowId, + tabId, + parentNodeId, + toolName: input.toolName, + argsSummary: summarizeArgs(input.args), + resultStatus: input.resultStatus, + progressStatus: input.progressStatus ?? 'unknown', + failureFingerprint, + recoveryTool: input.recoveryTool, + evidenceHandle: input.evidenceHandle, + observationSummary, + reward: input.reward, + }); + } + + private serializeBounded(node: RecoveryTrajectoryNode): string { + let current = node; + let json = JSON.stringify(current); + if (Buffer.byteLength(json, 'utf8') <= this.maxNodeBytes) return json; + + current = { + ...current, + observationSummary: truncate(current.observationSummary, 160), + argsSummary: compactObject(current.argsSummary), + }; + json = JSON.stringify(current); + if (Buffer.byteLength(json, 'utf8') <= this.maxNodeBytes) return json; + + current = pruneUndefined({ + nodeId: current.nodeId, + timestamp: current.timestamp, + sessionId: current.sessionId, + tabId: current.tabId, + parentNodeId: current.parentNodeId, + toolName: current.toolName, + resultStatus: current.resultStatus, + progressStatus: current.progressStatus, + failureFingerprint: current.failureFingerprint, + observationSummary: truncate(current.observationSummary, 80), + reward: current.reward, + }); + json = JSON.stringify(current); + if (Buffer.byteLength(json, 'utf8') <= this.maxNodeBytes) return json; + + return JSON.stringify({ + nodeId: current.nodeId, + timestamp: current.timestamp, + sessionId: current.sessionId, + toolName: current.toolName, + resultStatus: current.resultStatus, + progressStatus: current.progressStatus, + }); + } + + private queuePersist(node: RecoveryTrajectoryNode, serialized: string): void { + this.writeQueue = this.writeQueue + .then(async () => { + await fs.promises.mkdir(this.dirPath, { recursive: true }); + await fs.promises.appendFile(this.filePath, serialized + '\n', 'utf8'); + this.removePendingNode(node.nodeId); + await this.enforceBoundsAsync(); + }) + .catch((err) => { + this.removePendingNode(node.nodeId); + console.error(`[RecoveryTrajectoryLedger] record skipped: ${err instanceof Error ? err.message : String(err)}`); + }); + } + + private async enforceBoundsAsync(): Promise { + try { + const stat = await fs.promises.stat(this.filePath); + if (stat.size <= this.maxFileBytes) { + await this.enforceNodeCountOnlyAsync(); + await this.pruneSessionIndexFromDisk(); + return; + } + await this.rewriteTailAsync(); + await this.pruneSessionIndexFromDisk(); + } catch { + // best-effort + } + } + + private async enforceNodeCountOnlyAsync(): Promise { + const lines = await safeReadLinesAsync(this.filePath); + if (lines.length <= this.maxNodes) return; + await fs.promises.writeFile(this.filePath, lines.slice(-this.maxNodes).join('\n') + '\n', 'utf8'); + } + + private async rewriteTailAsync(): Promise { + const lines = (await safeReadLinesAsync(this.filePath)).slice(-this.maxNodes); + const kept: string[] = []; + let bytes = 0; + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i]; + const lineBytes = Buffer.byteLength(line + '\n', 'utf8'); + if (bytes + lineBytes > this.maxFileBytes && kept.length > 0) break; + kept.unshift(line); + bytes += lineBytes; + } + await fs.promises.writeFile(this.filePath, kept.join('\n') + (kept.length > 0 ? '\n' : ''), 'utf8'); + } + + private mergePending(nodes: RecoveryTrajectoryNode[]): RecoveryTrajectoryNode[] { + if (this.pendingNodes.length === 0) return nodes; + const seen = new Set(nodes.map((node) => node.nodeId)); + return nodes.concat(this.pendingNodes.filter((node) => !seen.has(node.nodeId))); + } + + private removePendingNode(nodeId: string): void { + this.pendingNodes = this.pendingNodes.filter((node) => node.nodeId !== nodeId); + } + + private prunePendingNodes(): void { + if (this.pendingNodes.length > this.maxNodes) { + this.pendingNodes = this.pendingNodes.slice(-this.maxNodes); + } + } + + private pruneSessionIndex(): void { + while (this.lastNodeBySession.size > this.maxSessionIndexEntries) { + const oldestSessionId = this.lastNodeBySession.keys().next().value; + if (!oldestSessionId) break; + this.lastNodeBySession.delete(oldestSessionId); + for (const key of this.lastNodeByContext.keys()) { + if (key === oldestSessionId || key.startsWith(`${oldestSessionId}\u0000`)) { + this.lastNodeByContext.delete(key); + } + } + } + } + + private async pruneSessionIndexFromDisk(): Promise { + try { + const nodes = parseRecoveryNodes((await safeReadLinesAsync(this.filePath)).slice(-this.maxNodes)); + const liveSessions = new Set(nodes.map((node) => node.sessionId)); + for (const sessionId of this.lastNodeBySession.keys()) { + if (!liveSessions.has(sessionId)) this.lastNodeBySession.delete(sessionId); + } + for (const [key, node] of this.lastNodeByContext.entries()) { + if (!liveSessions.has(node.sessionId)) this.lastNodeByContext.delete(key); + } + this.pruneSessionIndex(); + } catch { + this.pruneSessionIndex(); + } + } +} + +export function summarizeArgs(args?: Record): Record | undefined { + if (!args) return undefined; + return sanitizeObject(args, 0) as Record; +} + +export function summarizeResult(result?: Record): string | undefined { + if (!result) return undefined; + const content = result.content; + if (Array.isArray(content)) { + const text = content + .map((item) => (item && typeof item === 'object' && 'text' in item ? String((item as { text?: unknown }).text ?? '') : '')) + .filter(Boolean) + .join('\n'); + return summarizeText(text); + } + const summary = readString(result._summary) ?? readString(result.summary) ?? readString(result.message); + return summarizeText(summary); +} + +function contextKey(sessionId: string, tabId?: string): string { + return `${sessionId}\u0000${tabId ?? ''}`; +} + + +function isSensitiveKey(key: string): boolean { + if (SENSITIVE_KEY_RE.test(key)) return true; + const normalized = key + .replace(/([a-z0-9])([A-Z])/g, '$1_$2') + .replace(/[^A-Za-z0-9_-]+/g, '_'); + return SENSITIVE_KEY_RE.test(normalized); +} + +function sanitizeObject(value: unknown, depth: number, key = ''): unknown { + if (isSensitiveKey(key)) return REDACTED; + if (value === null || value === undefined) return value; + if (typeof value === 'string') { + const redacted = redactSecretString(redactSensitiveText(value)); + if (LARGE_VALUE_KEY_RE.test(key) || redacted.length > 200) return hashValue(redacted); + return redacted; + } + if (typeof value === 'number' || typeof value === 'boolean') return value; + if (Array.isArray(value)) { + if (depth >= 2) return `[array:${value.length}]`; + return value.slice(0, 10).map((item) => sanitizeObject(item, depth + 1, key)); + } + if (typeof value === 'object') { + if (depth >= 2) return '[object]'; + const out: Record = {}; + for (const [childKey, childValue] of Object.entries(value as Record).slice(0, 25)) { + out[childKey] = sanitizeObject(childValue, depth + 1, childKey); + } + return out; + } + return String(value); +} + +function compactObject(value: unknown): Record | undefined { + if (!value || typeof value !== 'object' || Array.isArray(value)) return undefined; + const out: Record = {}; + for (const [key, child] of Object.entries(value as Record).slice(0, 8)) { + if (typeof child === 'string' && child.length > 80) out[key] = hashValue(child); + else if (typeof child === 'object' && child !== null) out[key] = Array.isArray(child) ? `[array:${child.length}]` : '[object]'; + else out[key] = child; + } + return out; +} + +function summarizeText(text?: string): string | undefined { + if (!text) return undefined; + const compact = text.replace(/\s+/g, ' ').trim(); + return truncate(redactSensitiveText(compact), SUMMARY_MAX_CHARS); +} + +function redactSensitiveText(text: string): string { + return text + .replace(/(authorization\s*[:=]\s*)(bearer\s+)?[^\s,;]+/gi, '$1[REDACTED]') + .replace(/((?:set-)?cookie\s*[:=]\s*)[^\n]+/gi, '$1[REDACTED]') + .replace(/((?:password|secret|token|api[_-]?key|session[_-]?id)\s*[:=]\s*)[^\s,;&]+/gi, '$1[REDACTED]') + .replace(/([?&](?:password|secret|token|api[_-]?key|session[_-]?id)=)[^&#\s]+/gi, '$1[REDACTED]'); +} + +function truncate(value: string | undefined, max: number): string | undefined { + if (!value) return undefined; + return value.length > max ? `${value.slice(0, max)}…` : value; +} + +function fingerprint(text: string): string { + const normalized = text.toLowerCase().replace(/\d+/g, '').replace(/\s+/g, ' ').slice(0, 500); + return hashValue(normalized); +} + +function hashValue(value: string): string { + return HASHED_PREFIX + crypto.createHash('sha256').update(value).digest('hex').slice(0, 16); +} + +function readString(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function readIntEnv(name: string, fallback: number): number { + const parsed = Number.parseInt(process.env[name] ?? '', 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function pruneUndefined>(value: T): T { + for (const key of Object.keys(value)) { + if (value[key] === undefined) delete value[key]; + } + return value; +} + +function parseRecoveryNodes(lines: string[]): RecoveryTrajectoryNode[] { + const nodes: RecoveryTrajectoryNode[] = []; + for (const line of lines) { + if (line.trim().length === 0) continue; + try { + nodes.push(JSON.parse(line) as RecoveryTrajectoryNode); + } catch { + // Best-effort telemetry must tolerate torn/truncated JSONL appends. + } + } + return nodes; +} + +async function safeReadLinesAsync(filePath: string): Promise { + try { + return (await fs.promises.readFile(filePath, 'utf8')).split('\n').filter((line) => line.trim().length > 0); + } catch { + return []; + } +} diff --git a/tests/e2e/harness/fixture-server.ts b/tests/e2e/harness/fixture-server.ts index b8d1fbcc..7bf1f9d8 100644 --- a/tests/e2e/harness/fixture-server.ts +++ b/tests/e2e/harness/fixture-server.ts @@ -73,6 +73,7 @@ export class FixtureServer { '/site-c': this.sitePage('Site C', 'Data Dashboard', '
NameValue
Row 1100
Row 2200
'), '/login': this.loginPage(), '/protected': this.protectedPage(), + '/recovery/stale-ref': this.recoveryStaleRefPage(), }; // Slow endpoint @@ -115,6 +116,30 @@ export class FixtureServer {

${heading}

${content}`; } + + private recoveryStaleRefPage(): string { + return `Recovery Stale Ref +

Recovery Stale Ref Fixture

+ +

initial

+ +`; + } + private loginPage(): string { return `Login

Login

diff --git a/tests/recovery/trajectory-ledger.test.ts b/tests/recovery/trajectory-ledger.test.ts new file mode 100644 index 00000000..82a7234d --- /dev/null +++ b/tests/recovery/trajectory-ledger.test.ts @@ -0,0 +1,162 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { RecoveryTrajectoryLedger, summarizeArgs, summarizeResult } from '../../src/recovery'; +import { EMPTY_SECRET_STORE, makeSecretStore, setSecretStore } from '../../src/core/secrets'; + +describe('RecoveryTrajectoryLedger', () => { + let dir: string; + + beforeEach(() => { + dir = fs.mkdtempSync(path.join(os.tmpdir(), 'oc-recovery-ledger-')); + }); + + afterEach(() => { + setSecretStore(EMPTY_SECRET_STORE); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('records bounded success, error, and recovered nodes', () => { + const ledger = new RecoveryTrajectoryLedger({ dirPath: dir, maxNodes: 10, maxNodeBytes: 2048 }); + + const first = ledger.record({ + sessionId: 's1', + toolName: 'click', + args: { tabId: 'tab-1', ref: 'old' }, + resultStatus: 'error', + error: 'Element ref is stale', + }); + const second = ledger.record({ + sessionId: 's1', + toolName: 'read_page', + args: { tabId: 'tab-1' }, + parentNodeId: first?.nodeId, + resultStatus: 'recovered', + result: { content: [{ type: 'text', text: 'Fresh refs available' }] }, + recoveryTool: 'read_page', + }); + + expect(first?.failureFingerprint).toMatch(/^sha256:/); + expect(second?.parentNodeId).toBe(first?.nodeId); + + const nodes = ledger.readRecent(10, 's1'); + expect(nodes).toHaveLength(2); + expect(nodes[0].resultStatus).toBe('error'); + expect(nodes[1].resultStatus).toBe('recovered'); + expect(nodes[1].observationSummary).toContain('Fresh refs'); + }); + + it('redacts sensitive args and hashes large payloads', () => { + const args = summarizeArgs({ + username: 'alice', + password: 'super-secret', + authorization: 'Bearer token', + html: '' + 'x'.repeat(500) + '', + nested: { apiKey: 'key-123', accessToken: 'tok-123', sessionId: 'sid-123', authHeader: 'Bearer x', visible: 'ok' }, + }); + + expect(args).toMatchObject({ + username: 'alice', + password: '[REDACTED]', + authorization: '[REDACTED]', + nested: { + apiKey: '[REDACTED]', + accessToken: '[REDACTED]', + sessionId: '[REDACTED]', + authHeader: '[REDACTED]', + visible: 'ok', + }, + }); + expect(String(args?.html)).toMatch(/^sha256:/); + }); + + it('redacts secret values embedded in ordinary arg fields', () => { + setSecretStore(makeSecretStore(new Map([['API_TOKEN', 'sk-live-ordinary-field']]))); + + const args = summarizeArgs({ + note: 'retry with sk-live-ordinary-field', + url: 'https://example.test/path?token=query-secret&ok=1', + query: 'authorization: Bearer header-secret', + }); + + expect(args?.note).toBe('retry with ${SECRET:API_TOKEN}'); + expect(args?.url).toBe('https://example.test/path?token=[REDACTED]&ok=1'); + expect(args?.query).toBe('authorization: [REDACTED]'); + }); + + it('summarizes result text without storing full content or obvious secrets', () => { + const summary = summarizeResult({ + content: [{ type: 'text', text: 'hello\n'.repeat(200) + ' authorization: Bearer abc token=xyz cookie: sid=123' }], + }); + + expect(summary!.length).toBeLessThanOrEqual(501); + expect(summary).toContain('hello'); + expect(summary).not.toContain('abc'); + expect(summary).not.toContain('sid=123'); + }); + + it('enforces max node count', () => { + const ledger = new RecoveryTrajectoryLedger({ dirPath: dir, maxNodes: 3, maxNodeBytes: 2048 }); + + for (let i = 0; i < 8; i++) { + ledger.record({ sessionId: 's1', toolName: `tool-${i}`, resultStatus: 'success' }); + } + + const nodes = ledger.readRecent(10, 's1'); + expect(nodes).toHaveLength(3); + expect(nodes.map((n) => n.toolName)).toEqual(['tool-5', 'tool-6', 'tool-7']); + }); + + it('skips malformed persisted JSONL entries without hiding valid history', async () => { + const ledger = new RecoveryTrajectoryLedger({ dirPath: dir, maxNodes: 10, maxNodeBytes: 2048 }); + const first = ledger.record({ sessionId: 's1', toolName: 'read_page', resultStatus: 'success' }); + await ledger.flush(); + fs.appendFileSync(ledger.getPath(), '{bad-json\n', 'utf8'); + const second = ledger.record({ sessionId: 's1', toolName: 'click', resultStatus: 'error' }); + + const nodes = ledger.readRecent(10, 's1'); + expect(nodes.map((node) => node.nodeId)).toEqual([first!.nodeId, second!.nodeId]); + }); + + it('queues disk writes asynchronously while immediate reads include pending nodes', async () => { + const ledger = new RecoveryTrajectoryLedger({ dirPath: dir, maxNodes: 10, maxNodeBytes: 2048 }); + + const node = ledger.record({ sessionId: 's1', toolName: 'read_page', resultStatus: 'success' }); + + expect(node).not.toBeNull(); + expect(ledger.readRecent(10, 's1').map((n) => n.nodeId)).toContain(node!.nodeId); + + await ledger.flush(); + + const persisted = fs.readFileSync(ledger.getPath(), 'utf8'); + expect(persisted).toContain(node!.nodeId); + }); + + it('bounds the session parent index and prunes it after trimming', async () => { + const ledger = new RecoveryTrajectoryLedger({ dirPath: dir, maxNodes: 3, maxNodeBytes: 2048 }); + + for (let i = 0; i < 40; i++) { + ledger.record({ sessionId: `s${i}`, toolName: `tool-${i}`, resultStatus: 'success' }); + expect((ledger as unknown as { lastNodeBySession: Map }).lastNodeBySession.size).toBeLessThanOrEqual(16); + } + + await ledger.flush(); + + expect((ledger as unknown as { lastNodeBySession: Map }).lastNodeBySession.size).toBeLessThanOrEqual(3); + expect(ledger.readRecent(10).map((n) => n.sessionId)).toEqual(['s37', 's38', 's39']); + }); + + it('does not throw when queued writes fail', async () => { + const filePath = path.join(dir, 'not-a-dir'); + fs.writeFileSync(filePath, 'block mkdir'); + const ledger = new RecoveryTrajectoryLedger({ dirPath: filePath }); + + const node = ledger.record({ sessionId: 's1', toolName: 'read_page', resultStatus: 'success' }); + expect(node).not.toBeNull(); + expect(ledger.readRecent(10, 's1')).toHaveLength(1); + + await expect(ledger.flush()).resolves.toBeUndefined(); + expect(ledger.readRecent(10, 's1')).toHaveLength(0); + }); +});