From 8565816d57f4fffe51d5b62372e9b07ab9cc1647 Mon Sep 17 00:00:00 2001 From: shaun0927 <70629228+shaun0927@users.noreply.github.com> Date: Wed, 13 May 2026 00:34:07 +0900 Subject: [PATCH] Learn evidence-backed recovery ranking policies Add a bounded recovery policy learner so repeated evidence-backed recoveries can bias future candidate ranking without bypassing safety gates or executing actions.\n\nConstraint: Learned recovery policy must be advisory only and must not store sensitive page content or credentials.\nRejected: A separate autonomous recovery memory/executor | duplicates PatternLearner direction and risks unsafe automatic action.\nConfidence: high\nScope-risk: narrow\nDirective: Learned policies may rank candidates but must never override risk gates or opt-in execution controls.\nTested: npm test -- --runTestsByPath tests/recovery/policy-learner.test.ts tests/recovery/candidate-ranker.test.ts; npm run build\nNot-tested: Two-run live OpenChrome policy promotion transcript. --- src/recovery/candidate-ranker.ts | 5 +- src/recovery/index.ts | 3 + src/recovery/policy-learner.ts | 179 ++++++++++++++++++++++++++ tests/recovery/policy-learner.test.ts | 72 +++++++++++ 4 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 src/recovery/policy-learner.ts create mode 100644 tests/recovery/policy-learner.test.ts diff --git a/src/recovery/candidate-ranker.ts b/src/recovery/candidate-ranker.ts index 833b2e2d0..e3adf65ce 100644 --- a/src/recovery/candidate-ranker.ts +++ b/src/recovery/candidate-ranker.ts @@ -1,4 +1,5 @@ /** Advisory recovery candidate ranking for stuck/stalling hints. */ +import { policyRankBoost, type RecoveryPolicyRecord } from './policy-learner'; import { scoreRecoveryOutcome } from './reward-scorer'; export type RecoveryCandidateRisk = 'read_only' | 'reversible' | 'side_effect_possible'; @@ -25,6 +26,7 @@ export interface RecoveryCandidateRankInput { isError: boolean; recentCalls: RecentToolCallLike[]; maxCandidates?: number; + policies?: RecoveryPolicyRecord[]; } const BLIND_INTERACTION_TOOLS = new Set(['click', 'interact', 'computer', 'form_input', 'fill_form', 'javascript_tool']); @@ -50,7 +52,8 @@ export function rankRecoveryCandidates(input: RecoveryCandidateRankInput): Recov observationOnly: READ_TOOLS.has(candidate.tool), repeatedFailureCount: repeatedToolCount(input.recentCalls, input.toolName), }); - const score = clamp(candidate.baseScore + evidence.score * 0.25 - repeatedPenalty - sameFailedPenalty - riskPenalty); + const learnedBoost = policyRankBoost(input.policies, candidate.tool, candidate.risk); + const score = clamp(candidate.baseScore + evidence.score * 0.25 + learnedBoost - repeatedPenalty - sameFailedPenalty - riskPenalty); candidates.push({ ...candidate, score }); }; diff --git a/src/recovery/index.ts b/src/recovery/index.ts index 3c1df83f4..5b42a819f 100644 --- a/src/recovery/index.ts +++ b/src/recovery/index.ts @@ -16,3 +16,6 @@ export type { RecoveryRewardClassification, RecoveryRewardInput, RecoveryRewardS export { formatCandidateHint, rankRecoveryCandidates } from './candidate-ranker'; export type { RecoveryCandidate, RecoveryCandidateRankInput, RecoveryCandidateRisk, RecentToolCallLike } from './candidate-ranker'; + +export { policyRankBoost, RecoveryPolicyLearner } from './policy-learner'; +export type { RecoveryPolicyOutcome, RecoveryPolicyRecord, RecoveryPolicyLearnerOptions } from './policy-learner'; diff --git a/src/recovery/policy-learner.ts b/src/recovery/policy-learner.ts new file mode 100644 index 000000000..cbaea6420 --- /dev/null +++ b/src/recovery/policy-learner.ts @@ -0,0 +1,179 @@ +/** Evidence-backed recovery policy learning. Advisory only. */ +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +export interface RecoveryPolicyOutcome { + failureFingerprint: string; + domain?: string; + triggerTool: string; + recoveryTool: string; + safetyClass: 'read_only' | 'reversible' | 'side_effect_possible'; + evidenceBacked: boolean; + succeeded: boolean; +} + +export interface RecoveryPolicyRecord { + id: string; + failureFingerprint: string; + domain?: string; + triggerTool: string; + recoveryTool: string; + safetyClass: RecoveryPolicyOutcome['safetyClass']; + attempts: number; + successes: number; + failures: number; + confidence: number; + promoted: boolean; + firstSeen: number; + lastSeen: number; +} + +interface PolicyStoreFile { + version: number; + updatedAt: number; + policies: RecoveryPolicyRecord[]; +} + +export interface RecoveryPolicyLearnerOptions { + filePath?: string; + minAttempts?: number; + minConfidence?: number; + maxPolicies?: number; +} + +export class RecoveryPolicyLearner { + private readonly filePath: string; + private readonly minAttempts: number; + private readonly minConfidence: number; + private readonly maxPolicies: number; + private policies = new Map(); + + constructor(options: RecoveryPolicyLearnerOptions = {}) { + this.filePath = options.filePath ?? path.join(process.cwd(), '.openchrome', 'recovery', 'learned-policies.json'); + this.minAttempts = options.minAttempts ?? 3; + this.minConfidence = options.minConfidence ?? 0.67; + this.maxPolicies = options.maxPolicies ?? 500; + this.load(); + } + + record(outcome: RecoveryPolicyOutcome): RecoveryPolicyRecord | null { + if (!outcome.evidenceBacked) return null; + if (!outcome.failureFingerprint || !outcome.triggerTool || !outcome.recoveryTool) return null; + + const key = policyKey(outcome); + const now = Date.now(); + let record = this.policies.get(key); + if (!record) { + record = { + id: key, + failureFingerprint: outcome.failureFingerprint, + domain: sanitizeDomain(outcome.domain), + triggerTool: outcome.triggerTool, + recoveryTool: outcome.recoveryTool, + safetyClass: outcome.safetyClass, + attempts: 0, + successes: 0, + failures: 0, + confidence: 0, + promoted: false, + firstSeen: now, + lastSeen: now, + }; + this.policies.set(key, record); + } + + record.attempts++; + if (outcome.succeeded) record.successes++; + else record.failures++; + record.lastSeen = now; + record.confidence = round(record.successes / record.attempts); + record.promoted = record.attempts >= this.minAttempts && record.confidence >= this.minConfidence; + this.enforceCap(); + this.save(); + return { ...record }; + } + + getPolicies(filter: { failureFingerprint?: string; domain?: string; triggerTool?: string } = {}): RecoveryPolicyRecord[] { + const domain = sanitizeDomain(filter.domain); + return Array.from(this.policies.values()) + .filter((policy) => policy.promoted) + .filter((policy) => !filter.failureFingerprint || policy.failureFingerprint === filter.failureFingerprint) + .filter((policy) => !domain || policy.domain === domain || policy.domain === undefined) + .filter((policy) => !filter.triggerTool || policy.triggerTool === filter.triggerTool) + .sort((a, b) => b.confidence - a.confidence || b.successes - a.successes) + .map((policy) => ({ ...policy })); + } + + clear(): void { + this.policies.clear(); + this.save(); + } + + private load(): void { + try { + const parsed = JSON.parse(fs.readFileSync(this.filePath, 'utf8')) as PolicyStoreFile; + if (!Array.isArray(parsed.policies)) return; + for (const policy of parsed.policies.slice(-this.maxPolicies)) { + this.policies.set(policy.id, policy); + } + } catch { + // No persisted policies yet. + } + } + + private save(): void { + try { + fs.mkdirSync(path.dirname(this.filePath), { recursive: true }); + const payload: PolicyStoreFile = { + version: 1, + updatedAt: Date.now(), + policies: Array.from(this.policies.values()), + }; + fs.writeFileSync(this.filePath, JSON.stringify(payload, null, 2), 'utf8'); + } catch (err) { + console.error(`[RecoveryPolicyLearner] save skipped: ${err instanceof Error ? err.message : String(err)}`); + } + } + + private enforceCap(): void { + if (this.policies.size <= this.maxPolicies) return; + const sorted = Array.from(this.policies.values()).sort((a, b) => a.lastSeen - b.lastSeen); + for (const policy of sorted.slice(0, this.policies.size - this.maxPolicies)) { + this.policies.delete(policy.id); + } + } +} + +export function policyRankBoost( + policies: RecoveryPolicyRecord[] | undefined, + recoveryTool: string, + safetyClass: RecoveryPolicyOutcome['safetyClass'], +): number { + if (!policies || policies.length === 0) return 0; + const policy = policies.find((item) => item.recoveryTool === recoveryTool && item.safetyClass === safetyClass); + if (!policy) return 0; + return Math.min(0.25, policy.confidence * 0.2 + Math.min(policy.successes, 5) * 0.01); +} + +function policyKey(outcome: RecoveryPolicyOutcome): string { + return [ + outcome.failureFingerprint, + sanitizeDomain(outcome.domain) ?? '*', + outcome.triggerTool, + outcome.recoveryTool, + outcome.safetyClass, + ].join('|'); +} + +function sanitizeDomain(domain: string | undefined): string | undefined { + if (!domain) return undefined; + try { + return new URL(domain.includes('://') ? domain : `https://${domain}`).hostname.toLowerCase(); + } catch { + return domain.toLowerCase().replace(/[^a-z0-9.-]/g, '').slice(0, 120) || undefined; + } +} + +function round(value: number): number { + return Number(value.toFixed(3)); +} diff --git a/tests/recovery/policy-learner.test.ts b/tests/recovery/policy-learner.test.ts new file mode 100644 index 000000000..175387045 --- /dev/null +++ b/tests/recovery/policy-learner.test.ts @@ -0,0 +1,72 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { RecoveryPolicyLearner, rankRecoveryCandidates } from '../../src/recovery'; + +describe('RecoveryPolicyLearner', () => { + let dir: string; + let filePath: string; + + beforeEach(() => { + dir = fs.mkdtempSync(path.join(os.tmpdir(), 'oc-policy-')); + filePath = path.join(dir, 'policies.json'); + }); + + afterEach(() => fs.rmSync(dir, { recursive: true, force: true })); + + it('promotes repeated evidence-backed recoveries and persists them', () => { + const learner = new RecoveryPolicyLearner({ filePath, minAttempts: 3, minConfidence: 0.67 }); + for (let i = 0; i < 3; i++) { + learner.record({ + failureFingerprint: 'stale-ref', + domain: 'https://example.com/path', + triggerTool: 'interact', + recoveryTool: 'read_page', + safetyClass: 'read_only', + evidenceBacked: true, + succeeded: true, + }); + } + + const reloaded = new RecoveryPolicyLearner({ filePath, minAttempts: 3, minConfidence: 0.67 }); + const policies = reloaded.getPolicies({ failureFingerprint: 'stale-ref', domain: 'example.com' }); + expect(policies).toHaveLength(1); + expect(policies[0]).toMatchObject({ recoveryTool: 'read_page', promoted: true, confidence: 1 }); + }); + + it('does not promote ambiguous outcomes and downgrades confidence on failures', () => { + const learner = new RecoveryPolicyLearner({ filePath, minAttempts: 2, minConfidence: 0.75 }); + expect(learner.record({ + failureFingerprint: 'stale-ref', + triggerTool: 'interact', + recoveryTool: 'read_page', + safetyClass: 'read_only', + evidenceBacked: false, + succeeded: true, + })).toBeNull(); + + learner.record({ failureFingerprint: 'stale-ref', triggerTool: 'interact', recoveryTool: 'read_page', safetyClass: 'read_only', evidenceBacked: true, succeeded: true }); + learner.record({ failureFingerprint: 'stale-ref', triggerTool: 'interact', recoveryTool: 'read_page', safetyClass: 'read_only', evidenceBacked: true, succeeded: false }); + + expect(learner.getPolicies({ failureFingerprint: 'stale-ref' })).toHaveLength(0); + }); + + it('biases ranking without bypassing safety gates', () => { + const learner = new RecoveryPolicyLearner({ filePath, minAttempts: 1, minConfidence: 0.5 }); + learner.record({ failureFingerprint: 'timeout', triggerTool: 'navigate', recoveryTool: 'tabs_context', safetyClass: 'read_only', evidenceBacked: true, succeeded: true }); + const policies = learner.getPolicies({ failureFingerprint: 'timeout', triggerTool: 'navigate' }); + + const candidates = rankRecoveryCandidates({ + toolName: 'navigate', + resultText: 'Navigation timeout', + isError: true, + recentCalls: [{ toolName: 'navigate', result: 'error', error: 'Navigation timeout' }], + policies, + }); + + expect(candidates[0].tool).toBe('tabs_context'); + expect(candidates[0].risk).toBe('read_only'); + expect(candidates.every((candidate) => !candidate.blockedReason || candidate.risk !== 'read_only')).toBe(true); + }); +});