From 5cc02278283ec858f2a729ca5b56f08e4622d001 Mon Sep 17 00:00:00 2001
From: shaun0927 <70629228+shaun0927@users.noreply.github.com>
Date: Wed, 13 May 2026 00:31:08 +0900
Subject: [PATCH] Recover failed plans with opt-in safe candidates

Add bounded read-only recovery attempts for compiled plans so safe state-refresh candidates can repair stale or blocked steps under explicit opt-in budgets.\n\nConstraint: Live browser recovery must be opt-in, budgeted, and limited to read-only candidates.\nRejected: General MCTS/live browser branching | unsafe for authenticated sessions and unnecessary for the current harness goal.\nConfidence: medium\nScope-risk: moderate\nDirective: Do not widen candidate execution beyond read-only tools without an explicit destructive-action gate and live verification.\nTested: npm test -- --runTestsByPath tests/orchestration/plan-executor-bounded-recovery.test.ts; npm run build\nNot-tested: Full execute_plan MCP transcript against Chrome fixture.
---
 src/orchestration/plan-executor.ts            | 244 +++++++++++++-----
 src/tools/orchestration.ts                    |  12 +-
 src/types/plan-cache.ts                       |  35 ++-
 .../plan-executor-bounded-recovery.test.ts    |  72 ++++++
 4 files changed, 293 insertions(+), 70 deletions(-)
 create mode 100644 tests/orchestration/plan-executor-bounded-recovery.test.ts
diff --git a/src/orchestration/plan-executor.ts b/src/orchestration/plan-executor.ts
index 3138164f..3da99142 100644
--- a/src/orchestration/plan-executor.ts
+++ b/src/orchestration/plan-executor.ts
@@ -8,11 +8,14 @@
 import { MCPResult, ToolHandler } from '../types/mcp';
 import {
   CompiledPlan,
-  CompiledStep,
-  PlanErrorHandler,
-  PlanExecutionResult,
-} from '../types/plan-cache';
-import { withTimeout } from '../utils/with-timeout';
+  CompiledStep,
+  PlanErrorHandler,
+  PlanExecutionOptions,
+  PlanExecutionResult,
+  PlanRecoveryAttempt,
+} from '../types/plan-cache';
+import { rankRecoveryCandidates } from '../recovery';
+import { withTimeout } from '../utils/with-timeout';
 
 /**
  * Recursively substitute ${varName} templates in a value using the params map.
@@ -137,13 +140,15 @@ export class PlanExecutor {
     this.toolResolver = toolResolver;
   }
 
-  async execute(
-    plan: CompiledPlan,
-    sessionId: string,
-    runtimeParams: Record<string, unknown>
-  ): Promise<PlanExecutionResult> {
-    const startTime = Date.now();
-    let stepsExecuted = 0;
+  async execute(
+    plan: CompiledPlan,
+    sessionId: string,
+    runtimeParams: Record<string, unknown>,
+    options: PlanExecutionOptions = {}
+  ): Promise<PlanExecutionResult> {
+    const startTime = Date.now();
+    let stepsExecuted = 0;
+    const recoveryAttempts: PlanRecoveryAttempt[] = [];
 
     // 1. Build params map: plan defaults first, runtime overrides on top
     const params: Record<string, unknown> = {};
@@ -154,14 +159,25 @@ export class PlanExecutor {
     }
     Object.assign(params, runtimeParams);
 
-    const failure = (error: string): PlanExecutionResult => ({
-      success: false,
-      planId: plan.id,
-      error,
-      durationMs: Date.now() - startTime,
-      stepsExecuted,
-      totalSteps: plan.steps.length,
-    });
+    const withRecovery = <T extends PlanExecutionResult>(result: T): T => {
+      if (options.boundedRecovery?.enabled) {
+        result.recovery = {
+          enabled: true,
+          attempts: recoveryAttempts,
+          exhausted: countExecutedRecoveryAttempts(recoveryAttempts) >= (options.boundedRecovery.maxToolCalls ?? 2),
+        };
+      }
+      return result;
+    };
+
+    const failure = (error: string): PlanExecutionResult => withRecovery({
+      success: false,
+      planId: plan.id,
+      error,
+      durationMs: Date.now() - startTime,
+      stepsExecuted,
+      totalSteps: plan.steps.length,
+    });
 
     // 2. Execute each step sequentially
     for (const step of plan.steps) {
@@ -200,14 +216,18 @@ export class PlanExecutor {
           params,
           stepsExecuted
         );
-        if (recovered !== null) {
-          stepsExecuted = recovered.stepsExecuted;
-          // Merge any params updates from recovery into our params
-          Object.assign(params, recovered.params);
-          continue;
-        }
-
-        return failure(`Step ${step.order} (${step.tool}) failed: ${errMsg}`);
+        if (recovered !== null) {
+          stepsExecuted = recovered.stepsExecuted;
+          // Merge any params updates from recovery into our params
+          Object.assign(params, recovered.params);
+          continue;
+        }
+
+        const bounded = await this.tryBoundedRecovery(step, errMsg, sessionId, params, options, recoveryAttempts);
+        stepsExecuted += bounded.stepsExecuted;
+        if (bounded.recovered) continue;
+
+        return failure(`Step ${step.order} (${step.tool}) failed: ${errMsg}`);
       }
 
       // d. Check for error result
@@ -223,13 +243,17 @@ export class PlanExecutor {
           params,
           stepsExecuted
         );
-        if (recovered !== null) {
-          stepsExecuted = recovered.stepsExecuted;
-          Object.assign(params, recovered.params);
-          continue;
-        }
-
-        return failure(`Step ${step.order} (${step.tool}) returned error: ${errMsg}`);
+        if (recovered !== null) {
+          stepsExecuted = recovered.stepsExecuted;
+          Object.assign(params, recovered.params);
+          continue;
+        }
+
+        const bounded = await this.tryBoundedRecovery(step, errMsg, sessionId, params, options, recoveryAttempts);
+        stepsExecuted += bounded.stepsExecuted;
+        if (bounded.recovered) continue;
+
+        return failure(`Step ${step.order} (${step.tool}) returned error: ${errMsg}`);
       }
 
       // e. Check for empty result (before storing) — may trigger empty_result handler
@@ -242,13 +266,16 @@ export class PlanExecutor {
           params,
           stepsExecuted
         );
-        if (recovered !== null) {
-          stepsExecuted = recovered.stepsExecuted;
-          Object.assign(params, recovered.params);
-          continue;
-        }
-        // No handler for empty — treat as non-fatal, just skip storing
-      }
+        if (recovered !== null) {
+          stepsExecuted = recovered.stepsExecuted;
+          Object.assign(params, recovered.params);
+          continue;
+        }
+        const bounded = await this.tryBoundedRecovery(step, 'empty result', sessionId, params, options, recoveryAttempts);
+        stepsExecuted += bounded.stepsExecuted;
+        if (bounded.recovered) continue;
+        // No handler for empty — treat as non-fatal, just skip storing
+      }
 
       // f. Parse and store result if requested
       if (step.parseResult && step.parseResult.storeAs) {
@@ -270,29 +297,103 @@ export class PlanExecutor {
     const criteriaError = validateSuccessCriteria(plan.successCriteria, params);
     if (criteriaError) {
       console.error(`[PlanExecutor] Success criteria failed for plan=${plan.id}: ${criteriaError}`);
-      return {
-        success: false,
-        planId: plan.id,
-        error: `Success criteria not met: ${criteriaError}`,
-        durationMs: Date.now() - startTime,
-        stepsExecuted,
-        totalSteps: plan.steps.length,
-      };
+      return withRecovery({
+        success: false,
+        planId: plan.id,
+        error: `Success criteria not met: ${criteriaError}`,
+        durationMs: Date.now() - startTime,
+        stepsExecuted,
+        totalSteps: plan.steps.length,
+      });
     }
 
     // 4. Return success with all collected params as data
-    return {
-      success: true,
-      planId: plan.id,
-      data: params,
-      durationMs: Date.now() - startTime,
-      stepsExecuted,
-      totalSteps: plan.steps.length,
-    };
-  }
-
-  /**
-   * Attempt to find and run a recovery handler for a given condition.
+    return withRecovery({
+      success: true,
+      planId: plan.id,
+      data: params,
+      durationMs: Date.now() - startTime,
+      stepsExecuted,
+      totalSteps: plan.steps.length,
+    });
+  }
+
+  private async tryBoundedRecovery(
+    failedStep: CompiledStep,
+    errorText: string,
+    sessionId: string,
+    params: Record<string, unknown>,
+    options: PlanExecutionOptions,
+    recoveryAttempts: PlanRecoveryAttempt[],
+  ): Promise<{ recovered: boolean; stepsExecuted: number }> {
+    const config = options.boundedRecovery;
+    if (!config?.enabled) return { recovered: false, stepsExecuted: 0 };
+
+    const maxToolCalls = Math.max(0, config.maxToolCalls ?? 2);
+    if (countExecutedRecoveryAttempts(recoveryAttempts) >= maxToolCalls) {
+      return { recovered: false, stepsExecuted: 0 };
+    }
+
+    const candidates = rankRecoveryCandidates({
+      toolName: failedStep.tool,
+      resultText: errorText,
+      isError: true,
+      recentCalls: [{ toolName: failedStep.tool, result: 'error', error: errorText }],
+      maxCandidates: config.maxCandidates ?? 3,
+    });
+
+    let executed = 0;
+    for (const candidate of candidates) {
+      if (countExecutedRecoveryAttempts(recoveryAttempts) >= maxToolCalls) break;
+      if (candidate.risk !== 'read_only' || candidate.blockedReason) {
+        recoveryAttempts.push({
+          tool: candidate.tool,
+          status: 'blocked',
+          reason: candidate.blockedReason ?? `risk ${candidate.risk} is not allowed for bounded recovery`,
+        });
+        continue;
+      }
+
+      const handler = this.toolResolver(candidate.tool);
+      if (!handler) {
+        recoveryAttempts.push({ tool: candidate.tool, status: 'failed', reason: 'tool handler not found' });
+        continue;
+      }
+
+      const args = buildSafeRecoveryArgs(candidate.tool, params);
+      if (!args) {
+        recoveryAttempts.push({ tool: candidate.tool, status: 'blocked', reason: 'no safe argument template available' });
+        continue;
+      }
+
+      try {
+        const result = await withTimeout(
+          handler(sessionId, args),
+          config.perCandidateTimeoutMs ?? 5000,
+          `bounded recovery tool=${candidate.tool} for step=${failedStep.order}`,
+        );
+        executed++;
+        if (!result.isError && !isEmptyResult(result)) {
+          recoveryAttempts.push({ tool: candidate.tool, status: 'success', reason: candidate.reason });
+          return { recovered: true, stepsExecuted: executed };
+        }
+        recoveryAttempts.push({ tool: candidate.tool, status: 'failed', reason: 'candidate returned empty or error result' });
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        recoveryAttempts.push({
+          tool: candidate.tool,
+          status: /timed out/i.test(message) ? 'timeout' : 'failed',
+          reason: candidate.reason,
+          error: message,
+        });
+      }
+    }
+
+    return { recovered: false, stepsExecuted: executed };
+  }
+
+  /**
+   * Attempt to find and run a recovery handler for a given condition.
    * Returns updated stepsExecuted + params snapshot on success, null if no handler.
    */
   private async tryRecovery(
@@ -363,5 +464,20 @@ export class PlanExecutor {
     }
 
     return { stepsExecuted, params };
-  }
-}
+  }
+}
+
+function countExecutedRecoveryAttempts(attempts: PlanRecoveryAttempt[]): number {
+  return attempts.filter((attempt) => attempt.status !== 'blocked').length;
+}
+
+function buildSafeRecoveryArgs(tool: string, params: Record<string, unknown>): Record<string, unknown> | null {
+  const tabId = typeof params.tabId === 'string' ? params.tabId : undefined;
+  switch (tool) {
+    case 'read_page':
+    case 'tabs_context':
+      return tabId ? { tabId } : {};
+    default:
+      return null;
+  }
+}
diff --git a/src/tools/orchestration.ts b/src/tools/orchestration.ts
index 16923b33..0706af8b 100644
--- a/src/tools/orchestration.ts
+++ b/src/tools/orchestration.ts
@@ -666,6 +666,10 @@ const executePlanDefinition: MCPToolDefinition = {
         type: 'string',
         description: 'Tab ID to execute the plan against',
       },
+      boundedRecovery: {
+        type: 'boolean',
+        description: 'Opt-in: try a small, safety-gated read-only recovery search when a plan step fails',
+      },
       params: {
         type: 'object',
         description: 'Runtime params merged with plan defaults',
@@ -684,6 +688,7 @@ const executePlanHandler: ToolHandler = async (
   const planId = args.planId as string;
   const tabId = args.tabId as string;
   const runtimeParams = (args.params as Record<string, unknown>) || {};
+  const boundedRecovery = args.boundedRecovery === true;
 
   if (!planId || !tabId) {
     return {
@@ -749,7 +754,11 @@ const executePlanHandler: ToolHandler = async (
 
     // Execute the plan
     const mergedParams = { tabId, ...runtimeParams };
-    const result = await executor.execute(plan, sessionId, mergedParams);
+    const result = await executor.execute(plan, sessionId, mergedParams, {
+      boundedRecovery: boundedRecovery
+        ? { enabled: true, maxCandidates: 3, maxToolCalls: 2, perCandidateTimeoutMs: 5000 }
+        : undefined,
+    });
 
     // Update stats
     registry.updateStats(planId, result.success, result.durationMs);
@@ -765,6 +774,7 @@ const executePlanHandler: ToolHandler = async (
           durationMs: result.durationMs,
           data: result.data,
           error: result.error,
+          recovery: result.recovery,
           message: result.success
             ? `Plan "${planId}" executed successfully in ${result.durationMs}ms (${result.stepsExecuted}/${result.totalSteps} steps)`
             : `Plan "${planId}" failed: ${result.error}. Consider manual execution.`,
diff --git a/src/types/plan-cache.ts b/src/types/plan-cache.ts
index a52af178..7ba4046e 100644
--- a/src/types/plan-cache.ts
+++ b/src/types/plan-cache.ts
@@ -109,8 +109,31 @@ export interface PlanRegistryData {
   updatedAt: number;
 }
 
-/** Result of plan execution */
-export interface PlanExecutionResult {
+/** Bounded recovery telemetry for plan execution. */
+export interface PlanRecoveryAttempt {
+  tool: string;
+  status: 'success' | 'failed' | 'blocked' | 'timeout';
+  reason: string;
+  error?: string;
+}
+
+export interface PlanRecoverySummary {
+  enabled: boolean;
+  attempts: PlanRecoveryAttempt[];
+  exhausted: boolean;
+}
+
+export interface PlanExecutionOptions {
+  boundedRecovery?: {
+    enabled: boolean;
+    maxCandidates?: number;
+    maxToolCalls?: number;
+    perCandidateTimeoutMs?: number;
+  };
+}
+
+/** Result of plan execution */
+export interface PlanExecutionResult {
   /** Whether the plan executed successfully */
   success: boolean;
   /** Plan ID that was executed */
@@ -123,6 +146,8 @@ export interface PlanExecutionResult {
   durationMs: number;
   /** Number of steps executed */
   stepsExecuted: number;
-  /** Total steps in plan */
-  totalSteps: number;
-}
+  /** Total steps in plan */
+  totalSteps: number;
+  /** Optional bounded recovery summary when opt-in recovery is enabled. */
+  recovery?: PlanRecoverySummary;
+}
diff --git a/tests/orchestration/plan-executor-bounded-recovery.test.ts b/tests/orchestration/plan-executor-bounded-recovery.test.ts
new file mode 100644
index 00000000..cc98a57e
--- /dev/null
+++ b/tests/orchestration/plan-executor-bounded-recovery.test.ts
@@ -0,0 +1,72 @@
+import { PlanExecutor } from '../../src/orchestration/plan-executor';
+import type { CompiledPlan } from '../../src/types/plan-cache';
+import type { MCPResult, ToolHandler } from '../../src/types/mcp';
+
+const text = (value: string, isError = false): MCPResult => ({ content: [{ type: 'text', text: value }], isError });
+
+function plan(): CompiledPlan {
+  return {
+    id: 'test-plan',
+    version: '1',
+    description: 'test',
+    parameters: {},
+    steps: [
+      { order: 1, tool: 'interact', args: { ref: 'old', tabId: '${tabId}' }, timeout: 1000 },
+      { order: 2, tool: 'read_page', args: { tabId: '${tabId}' }, timeout: 1000, parseResult: { format: 'text', storeAs: 'page' } },
+    ],
+    errorHandlers: [],
+    successCriteria: { requiredFields: ['page'] },
+  };
+}
+
+describe('PlanExecutor bounded recovery', () => {
+  it('is disabled by default and preserves original failure', async () => {
+    const handlers = new Map<string, ToolHandler>([
+      ['interact', async () => text('ref is stale', true)],
+      ['read_page', async () => text('fresh page')],
+    ]);
+    const executor = new PlanExecutor((name) => handlers.get(name) ?? null);
+
+    const result = await executor.execute(plan(), 's1', { tabId: 'tab-1' });
+
+    expect(result.success).toBe(false);
+    expect(result.recovery).toBeUndefined();
+  });
+
+  it('tries bounded read-only recovery and continues on success', async () => {
+    const calls: string[] = [];
+    const handlers = new Map<string, ToolHandler>([
+      ['interact', async () => { calls.push('interact'); return text('ref is stale', true); }],
+      ['read_page', async () => { calls.push('read_page'); return text('fresh page with refs'); }],
+    ]);
+    const executor = new PlanExecutor((name) => handlers.get(name) ?? null);
+
+    const result = await executor.execute(plan(), 's1', { tabId: 'tab-1' }, {
+      boundedRecovery: { enabled: true, maxCandidates: 2, maxToolCalls: 1, perCandidateTimeoutMs: 1000 },
+    });
+
+    expect(result.success).toBe(true);
+    expect(calls).toEqual(['interact', 'read_page', 'read_page']);
+    expect(result.recovery?.attempts[0]).toMatchObject({ tool: 'read_page', status: 'success' });
+  });
+
+  it('blocks side-effect candidates and enforces max tool-call budget', async () => {
+    const handlers = new Map<string, ToolHandler>([
+      ['click', async () => text('CAPTCHA Access Denied Login page detected', true)],
+      ['read_page', async () => text('', false)],
+      ['tabs_context', async () => text('tab state')],
+    ]);
+    const captchaPlan = plan();
+    captchaPlan.steps[0] = { order: 1, tool: 'click', args: { tabId: '${tabId}' }, timeout: 1000 };
+    const executor = new PlanExecutor((name) => handlers.get(name) ?? null);
+
+    const result = await executor.execute(captchaPlan, 's1', { tabId: 'tab-1' }, {
+      boundedRecovery: { enabled: true, maxCandidates: 3, maxToolCalls: 1, perCandidateTimeoutMs: 1000 },
+    });
+
+    expect(result.success).toBe(false);
+    expect(result.recovery?.attempts.length).toBe(1);
+    expect(result.recovery?.exhausted).toBe(true);
+    expect(result.recovery?.attempts[0].tool).toBe('read_page');
+  });
+});