shaun0927 · shaun0927 · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/src/failure/categories.ts b/src/failure/categories.ts
@@ -0,0 +1,32 @@
+/**
+ * Shared structured failure categories for OpenChrome runtime/tool failures.
+ *
+ * These categories are intentionally deterministic and dependency-free so they
+ * can be attached to tool responses, run events, evidence bundles, and future
+ * recovery policies without changing existing tool behavior.
+ */
+export const FAILURE_CATEGORIES = [
+  'STALE_REF',
+  'ELEMENT_NOT_FOUND',
+  'NAVIGATION_TIMEOUT',
+  'TAB_UNHEALTHY',
+  'BROWSER_CRASH',
+  'CONNECTION_LOST',
+  'AUTH_REQUIRED',
+  'CAPTCHA_OR_WAF',
+  'NO_PROGRESS',
+  'MAX_STEPS_EXCEEDED',
+  'POSTCONDITION_FAILED',
+  'LLM_WANDERING',
+  'UNKNOWN',
+] as const;
+
+export type FailureCategory = typeof FAILURE_CATEGORIES[number];
+
+export interface FailureClassification {
+  category: FailureCategory;
+  /** 0..1 deterministic confidence score. */
+  confidence: number;
+  /** Short human-readable explanation suitable for logs/metadata. */
+  reason: string;
+}
diff --git a/src/failure/classifier.ts b/src/failure/classifier.ts
@@ -0,0 +1,187 @@
+import type { FailureCategory, FailureClassification } from './categories.js';
+
+export interface ClassifyFailureInput {
+  /** Error object, string, or arbitrary thrown value. */
+  error?: unknown;
+  /** Optional explicit message/result text when no Error object exists. */
+  message?: string;
+  /** Tool that produced the failure, if known. */
+  toolName?: string;
+  /** HintEngine rule name, if classification is driven by a hint. */
+  hintRule?: string;
+  /** Current URL/title can add context for auth and WAF ambiguity. */
+  currentUrl?: string;
+  pageTitle?: string;
+  /** When true, return UNKNOWN if no pattern matches. Defaults to true. */
+  fallbackToUnknown?: boolean;
+}
+
+interface Rule {
+  category: FailureCategory;
+  confidence: number;
+  reason: string;
+  test(input: NormalizedFailureInput): boolean;
+}
+
+interface NormalizedFailureInput {
+  text: string;
+  errorName: string;
+  toolName: string;
+  hintRule: string;
+  currentUrl: string;
+  pageTitle: string;
+}
+
+const AUTH_CONTEXT = /\b(log in|login|signin|sign in|auth|authentication|password|credential|permissions?|mfa|2fa|totp|session expired)\b/i;
+const AUTH_DIRECT = /\b(401|unauthorized|please sign in|session expired)\b/i;
+const FORBIDDEN_SIGNAL = /\b(403|forbidden)\b/i;
+const WAF_CONTEXT = /\b(captcha|cloudflare|akamai|imperva|datadome|human verification|verify you are human|bot[- ]?check|anti[- ]?bot|ip block|request block|access denied|just a moment)\b/i;
+
+const RULES: Rule[] = [
+  {
+    category: 'STALE_REF',
+    confidence: 0.95,
+    reason: 'Reference is stale or invalid after page changes',
+    test: ({ text }) => /\b(stale ref|invalid ref|ref\b.+not found|backendnodeid.+not found|node is detached|no node with given id)\b/i.test(text),
+  },
+  {
+    category: 'CONNECTION_LOST',
+    confidence: 0.95,
+    reason: 'CDP/browser transport connection was lost',
+    test: ({ text }) => /\b(not connected to chrome|call connect\(\) first|websocket.*closed|websocket is not open|browser has disconnected|browser disconnected|cdpsession connection closed|connection closed|session closed|protocol error.*(?:connection|disconnected)|puppeteer\.connect\(\) timed out|session initialization timed out)\b/i.test(text),
+  },
+  {
+    category: 'BROWSER_CRASH',
+    confidence: 0.92,
+    reason: 'Browser process or renderer appears to have crashed',
+    test: ({ text, errorName }) => /\b(browser crash|browser process.*dead|chrome process.*dead|renderer process.*gone|crashed)\b/i.test(`${errorName} ${text}`) || (/targetclosederror/i.test(errorName) && /\b(crash|crashed|browser)\b/i.test(text)),
+  },
+  {
+    category: 'TAB_UNHEALTHY',
+    confidence: 0.9,
+    reason: 'Target tab is closed, missing, frozen, or unhealthy',
+    test: ({ text }) => /\b(tab.+not found|target.+not found|invalid tab|no such tab|page closed|target closed|tab health probe timeout|tab.+unhealthy|eviction threshold)\b/i.test(text),
+  },
+  {
+    category: 'NAVIGATION_TIMEOUT',
+    confidence: 0.9,
+    reason: 'Navigation or page-load wait timed out',
+    test: ({ text, toolName }) => /\b(navigation timeout|page load timeout|waiting for navigation failed|net::err_timed_out|timeout.*navigation|timed out.*navigate|navigate.*timed out)\b/i.test(text) || (toolName === 'navigate' && /\b(timeout|timed out)\b/i.test(text)),
+  },
+  {
+    category: 'ELEMENT_NOT_FOUND',
+    confidence: 0.88,
+    reason: 'Requested selector/ref/semantic element could not be found',
+    test: ({ text }) => /\b(element not found|no elements? found|no matching element|selector.+not found|selector.+failed|queryselectorall.*(?:0|zero)|could not find (?:an? )?(?:element|selector|ref|button|link|input|field|node)|no good match found|no clickable elements found)\b/i.test(text),
+  },
+  {
+    category: 'CAPTCHA_OR_WAF',
+    confidence: 0.86,
+    reason: 'Page indicates CAPTCHA, WAF, bot detection, or access-denied block',
+    test: (input) => {
+      const combined = `${input.text} ${input.currentUrl} ${input.pageTitle}`;
+      if (!WAF_CONTEXT.test(combined)) return false;
+      // Access denied is ambiguous. Treat it as auth only when auth context is present.
+      if (/access denied/i.test(combined) && AUTH_CONTEXT.test(combined)) return false;
+      return true;
+    },
+  },
+  {
+    category: 'AUTH_REQUIRED',
+    confidence: 0.84,
+    reason: 'Page or failure indicates missing/expired authentication or credentials',
+    test: (input) => {
+      const combined = `${input.text} ${input.currentUrl} ${input.pageTitle}`;
+      return AUTH_CONTEXT.test(combined) || AUTH_DIRECT.test(combined) || (FORBIDDEN_SIGNAL.test(combined) && AUTH_CONTEXT.test(combined));
+    },
+  },
+  {
+    category: 'NO_PROGRESS',
+    confidence: 0.82,
+    reason: 'Recent actions are stalling or made no meaningful progress',
+    test: ({ text, hintRule }) => /\b(progress-tracker-stuck|progress-tracker-stalling|no meaningful progress|stalling|stuck|same-tool-same-result|tool-oscillation|coordinate-click-stall)\b/i.test(`${hintRule} ${text}`),
+  },
+  {
+    category: 'LLM_WANDERING',
+    confidence: 0.78,
+    reason: 'Repeated low-value actions suggest agent wandering',
+    test: ({ text, hintRule }) => /\b(wandering|oscillation|coordinate-click-stall|screenshot-verification-loop|same-tool-same-result|multiple coordinate clicks|multiple screenshots|escalation ladder)\b/i.test(`${hintRule} ${text}`),
+  },
+  {
+    category: 'MAX_STEPS_EXCEEDED',
+    confidence: 0.9,
+    reason: 'Execution exceeded configured step or tool-call budget',
+    test: ({ text }) => /\b(max steps|max number of|maximum steps|step limit|max iterations|max tool calls|budget exceeded)\b/i.test(text),
+  },
+  {
+    category: 'POSTCONDITION_FAILED',
+    confidence: 0.9,
+    reason: 'Outcome contract or postcondition did not pass',
+    test: ({ text }) => /\b(postcondition(?:_| )violation|postcondition failed|success criteria not met|contract.+failed|assertion failed|oc_assert.+failed)\b/i.test(text),
+  },
+];
+
+export function classifyFailure(input: ClassifyFailureInput = {}): FailureClassification[] {
+  const normalized = normalize(input);
+  const found = new Map<FailureCategory, FailureClassification>();
+
+  for (const rule of RULES) {
+    if (!rule.test(normalized)) continue;
+    const prev = found.get(rule.category);
+    if (!prev || rule.confidence > prev.confidence) {
+      found.set(rule.category, {
+        category: rule.category,
+        confidence: rule.confidence,
+        reason: rule.reason,
+      });
+    }
+  }
+
+  const results = [...found.values()].sort((a, b) => b.confidence - a.confidence || a.category.localeCompare(b.category));
+  if (results.length === 0 && input.fallbackToUnknown !== false) {
+    return [{ category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' }];
+  }
+  return results;
+}
+
+export function primaryFailureCategory(input: ClassifyFailureInput & { fallbackToUnknown: false }): FailureClassification | undefined;
+export function primaryFailureCategory(input?: ClassifyFailureInput): FailureClassification;
+export function primaryFailureCategory(input: ClassifyFailureInput = {}): FailureClassification | undefined {
+  const [classification] = classifyFailure(input);
+  if (classification || input.fallbackToUnknown === false) return classification;
+  return { category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' };
+}
+
+function normalize(input: ClassifyFailureInput): NormalizedFailureInput {
+  const errorName = errorTypeName(input.error);
+  const textParts = [
+    stringifyError(input.error),
+    input.message,
+  ].filter(Boolean);
+  return {
+    text: textParts.join(' ').toLowerCase(),
+    errorName: errorName.toLowerCase(),
+    toolName: (input.toolName ?? '').toLowerCase(),
+    hintRule: input.hintRule ?? '',
+    currentUrl: input.currentUrl ?? '',
+    pageTitle: input.pageTitle ?? '',
+  };
+}
+
+function errorTypeName(error: unknown): string {
+  if (!error || typeof error !== 'object') return '';
+  const ctor = (error as { constructor?: { name?: string } }).constructor?.name;
+  const name = (error as { name?: unknown }).name;
+  return typeof name === 'string' ? name : ctor ?? '';
+}
+
+function stringifyError(error: unknown): string {
+  if (error === undefined || error === null) return '';
+  if (typeof error === 'string') return error;
+  if (error instanceof Error) return `${error.name}: ${error.message}`;
+  try {
+    return JSON.stringify(error);
+  } catch {
+    return String(error);
+  }
+}
diff --git a/src/failure/index.ts b/src/failure/index.ts
@@ -0,0 +1,2 @@
+export * from './categories.js';
+export * from './classifier.js';
diff --git a/tests/failure/classifier.test.ts b/tests/failure/classifier.test.ts
@@ -0,0 +1,93 @@
+import { classifyFailure, primaryFailureCategory } from '../../src/failure';
+
+function categories(message: string, extra: Parameters<typeof classifyFailure>[0] = {}) {
+  return classifyFailure({ message, ...extra }).map((r) => r.category);
+}
+
+describe('failure classifier', () => {
+  it('classifies stale refs', () => {
+    expect(primaryFailureCategory({ message: 'Error: stale ref abc is no longer available' }).category).toBe('STALE_REF');
+  });
+
+  it('classifies missing elements', () => {
+    expect(categories('selector failed: no matching element found')).toContain('ELEMENT_NOT_FOUND');
+  });
+
+  it('classifies navigation timeouts', () => {
+    expect(primaryFailureCategory({ toolName: 'navigate', message: 'Navigation timeout of 30000 ms exceeded' }).category).toBe('NAVIGATION_TIMEOUT');
+  });
+
+  it('classifies tab and target failures', () => {
+    expect(categories('invalid tab: no such tab')).toContain('TAB_UNHEALTHY');
+    expect(categories('CDPSession connection closed')).toContain('CONNECTION_LOST');
+  });
+
+  it('classifies browser crashes', () => {
+    const error = new Error('Target closed because the browser crash closed the renderer');
+    error.name = 'TargetClosedError';
+    expect(categories('', { error })).toContain('BROWSER_CRASH');
+  });
+
+  it('classifies auth-required access denied separately from WAF access denied', () => {
+    expect(primaryFailureCategory({ message: 'Access denied: login session expired, please sign in' }).category).toBe('AUTH_REQUIRED');
+    expect(primaryFailureCategory({ message: 'Access Denied reference from Akamai bot block' }).category).toBe('CAPTCHA_OR_WAF');
+  });
+
+  it('does not treat bare forbidden responses as auth-required', () => {
+    expect(categories('403 Forbidden')).not.toContain('AUTH_REQUIRED');
+    expect(primaryFailureCategory({ message: '403 Forbidden', fallbackToUnknown: false })).toBeUndefined();
+    expect(categories('Forbidden: login session expired')).toContain('AUTH_REQUIRED');
+  });
+
+  it('does not classify generic 403 forbidden server errors as auth-required', () => {
+    expect(categories('HTTP 403 Forbidden server error')).not.toContain('AUTH_REQUIRED');
+  });
+
+  it('classifies forbidden permission and auth contexts as auth-required', () => {
+    expect(categories('Forbidden: missing permission to access this tool')).toContain('AUTH_REQUIRED');
+    expect(categories('403 Forbidden: authentication credentials are required')).toContain('AUTH_REQUIRED');
+  });
+
+  it('classifies CAPTCHA and WAF blockers', () => {
+    expect(categories('Cloudflare says verify you are human captcha detected')).toContain('CAPTCHA_OR_WAF');
+  });
+
+  it('maps progress tracker stuck hints to no progress and wandering', () => {
+    const result = classifyFailure({ hintRule: 'progress-tracker-stuck', message: 'STOP — no meaningful progress, screenshot-verification-loop' });
+    expect(result.map((r) => r.category)).toEqual(expect.arrayContaining(['NO_PROGRESS', 'LLM_WANDERING']));
+  });
+
+  it('classifies step budget and postcondition failures', () => {
+    expect(categories('Reached the max number of 10 steps')).toContain('MAX_STEPS_EXCEEDED');
+    expect(categories('postcondition_violation: oc_assert failed')).toContain('POSTCONDITION_FAILED');
+  });
+
+  it('falls back to UNKNOWN by default and can suppress fallback', () => {
+    expect(classifyFailure({ message: 'some unrecognized failure' })).toEqual([
+      { category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' },
+    ]);
+    expect(classifyFailure({ message: 'some unrecognized failure', fallbackToUnknown: false })).toEqual([]);
+  });
+
+  it('classifies protocol errors for missing DOM nodes as stale references, not connection loss', () => {
+    const result = primaryFailureCategory({
+      error: new Error('Protocol error (DOM.resolveNode): No node with given id found'),
+      toolName: 'click',
+    });
+
+    expect(result.category).toBe('STALE_REF');
+  });
+
+  it('does not classify navigation context churn as connection loss', () => {
+    expect(categories('Execution context was destroyed, most likely because of a navigation')).not.toContain('CONNECTION_LOST');
+    expect(categories('Cannot find context with specified id')).not.toContain('CONNECTION_LOST');
+    expect(categories('Inspected target navigated or closed')).not.toContain('CONNECTION_LOST');
+    expect(categories('Protocol error (Runtime.callFunctionOn): Inspected target navigated or closed')).not.toContain('CONNECTION_LOST');
+  });
+
+  it('keeps generic could-not-find runtime failures out of element-not-found', () => {
+    expect(categories('Could not find expected browser (chrome) locally')).not.toContain('ELEMENT_NOT_FOUND');
+    expect(categories('Could not find element for selector .submit')).toContain('ELEMENT_NOT_FOUND');
+  });
+
+});
diff --git a/tests/tools/console-capture-regression.test.ts b/tests/tools/console-capture-regression.test.ts
@@ -3,8 +3,9 @@
  * Regression fixture test for console_capture tool (#897).
  *
  * Verifies that for a frozen 100-entry input (cap not hit), the `get` response
- * fields excluding `bufferStats` are byte-identical to the v1.11.0 baseline
- * captured at tests/fixtures/console-capture/baseline-v1.11.0.json.
+ * fields excluding `bufferStats` match the v1.11.0 baseline captured at
+ * tests/fixtures/console-capture/baseline-v1.11.0.json. Fixture newlines are
+ * normalized because Windows checkouts may convert LF to CRLF.
  *
  * This test protects against future regressions, not against this PR's own changes.
  * The fixture was captured from the post-change code with a 100-log input.
@@ -132,7 +133,7 @@ const FIXTURE_PATH = path.join(
 );
 
 describe('console_capture get response — v1.11.0 baseline regression', () => {
-  test('response shape (excluding bufferStats) matches baseline fixture byte-for-byte', () => {
+  test('response shape (excluding bufferStats) matches baseline fixture', () => {
     const frozenLogs = buildFrozenLogs();
     const response = buildGetResponse(frozenLogs);
     const responseJson = JSON.stringify(response, null, 2);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		export * from './categories.js';
		export * from './classifier.js';