Skip to content
Merged
32 changes: 32 additions & 0 deletions src/failure/categories.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Shared structured failure categories for OpenChrome runtime/tool failures.
*
* These categories are intentionally deterministic and dependency-free so they
* can be attached to tool responses, run events, evidence bundles, and future
* recovery policies without changing existing tool behavior.
*/
export const FAILURE_CATEGORIES = [
'STALE_REF',
'ELEMENT_NOT_FOUND',
'NAVIGATION_TIMEOUT',
'TAB_UNHEALTHY',
'BROWSER_CRASH',
'CONNECTION_LOST',
'AUTH_REQUIRED',
'CAPTCHA_OR_WAF',
'NO_PROGRESS',
'MAX_STEPS_EXCEEDED',
'POSTCONDITION_FAILED',
'LLM_WANDERING',
'UNKNOWN',
] as const;

export type FailureCategory = typeof FAILURE_CATEGORIES[number];

export interface FailureClassification {
category: FailureCategory;
/** 0..1 deterministic confidence score. */
confidence: number;
/** Short human-readable explanation suitable for logs/metadata. */
reason: string;
}
187 changes: 187 additions & 0 deletions src/failure/classifier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import type { FailureCategory, FailureClassification } from './categories.js';

export interface ClassifyFailureInput {
/** Error object, string, or arbitrary thrown value. */
error?: unknown;
/** Optional explicit message/result text when no Error object exists. */
message?: string;
/** Tool that produced the failure, if known. */
toolName?: string;
/** HintEngine rule name, if classification is driven by a hint. */
hintRule?: string;
/** Current URL/title can add context for auth and WAF ambiguity. */
currentUrl?: string;
pageTitle?: string;
/** When true, return UNKNOWN if no pattern matches. Defaults to true. */
fallbackToUnknown?: boolean;
}

interface Rule {
category: FailureCategory;
confidence: number;
reason: string;
test(input: NormalizedFailureInput): boolean;
}

interface NormalizedFailureInput {
text: string;
errorName: string;
toolName: string;
hintRule: string;
currentUrl: string;
pageTitle: string;
}

const AUTH_CONTEXT = /\b(log in|login|signin|sign in|auth|authentication|password|credential|permissions?|mfa|2fa|totp|session expired)\b/i;
const AUTH_DIRECT = /\b(401|unauthorized|please sign in|session expired)\b/i;
const FORBIDDEN_SIGNAL = /\b(403|forbidden)\b/i;
const WAF_CONTEXT = /\b(captcha|cloudflare|akamai|imperva|datadome|human verification|verify you are human|bot[- ]?check|anti[- ]?bot|ip block|request block|access denied|just a moment)\b/i;

const RULES: Rule[] = [
{
category: 'STALE_REF',
confidence: 0.95,
reason: 'Reference is stale or invalid after page changes',
test: ({ text }) => /\b(stale ref|invalid ref|ref\b.+not found|backendnodeid.+not found|node is detached|no node with given id)\b/i.test(text),
},
{
category: 'CONNECTION_LOST',
confidence: 0.95,
reason: 'CDP/browser transport connection was lost',
test: ({ text }) => /\b(not connected to chrome|call connect\(\) first|websocket.*closed|websocket is not open|browser has disconnected|browser disconnected|cdpsession connection closed|connection closed|session closed|protocol error.*(?:connection|disconnected)|puppeteer\.connect\(\) timed out|session initialization timed out)\b/i.test(text),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Remove generic session closed from connection-loss matcher

The CONNECTION_LOST regex treats any session closed text as a transport disconnect, but Puppeteer frequently emits messages like Protocol error (...): Session closed. Most likely the page has been closed when only the target tab dies. In that case this rule wins with higher confidence than TAB_UNHEALTHY, so primaryFailureCategory reports a browser connection outage and can trigger reconnect/restart logic instead of tab-level recovery.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Exclude tab-closure errors from CONNECTION_LOST

The CONNECTION_LOST rule currently matches any error containing session closed, which captures common Puppeteer tab-close failures like Protocol error (...): Session closed. Most likely the page has been closed. even when the browser transport is still healthy. Because TAB_UNHEALTHY does not match that phrasing and has lower confidence anyway, primaryFailureCategory will mislabel tab-level failures as connection loss, which will drive the wrong remediation path (reconnect browser instead of recovering/replacing the tab).

Useful? React with 👍 / 👎.

},
{
category: 'BROWSER_CRASH',
confidence: 0.92,
reason: 'Browser process or renderer appears to have crashed',
test: ({ text, errorName }) => /\b(browser crash|browser process.*dead|chrome process.*dead|renderer process.*gone|crashed)\b/i.test(`${errorName} ${text}`) || (/targetclosederror/i.test(errorName) && /\b(crash|crashed|browser)\b/i.test(text)),
},
{
category: 'TAB_UNHEALTHY',
confidence: 0.9,
reason: 'Target tab is closed, missing, frozen, or unhealthy',
test: ({ text }) => /\b(tab.+not found|target.+not found|invalid tab|no such tab|page closed|target closed|tab health probe timeout|tab.+unhealthy|eviction threshold)\b/i.test(text),
},
{
category: 'NAVIGATION_TIMEOUT',
confidence: 0.9,
reason: 'Navigation or page-load wait timed out',
test: ({ text, toolName }) => /\b(navigation timeout|page load timeout|waiting for navigation failed|net::err_timed_out|timeout.*navigation|timed out.*navigate|navigate.*timed out)\b/i.test(text) || (toolName === 'navigate' && /\b(timeout|timed out)\b/i.test(text)),
},
{
category: 'ELEMENT_NOT_FOUND',
confidence: 0.88,
reason: 'Requested selector/ref/semantic element could not be found',
test: ({ text }) => /\b(element not found|no elements? found|no matching element|selector.+not found|selector.+failed|queryselectorall.*(?:0|zero)|could not find (?:an? )?(?:element|selector|ref|button|link|input|field|node)|no good match found|no clickable elements found)\b/i.test(text),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Require zero-only match for querySelectorAll not-found errors

Tighten the ELEMENT_NOT_FOUND regex so queryselectorall only matches explicit zero results. As written, queryselectorall.*(?:0|zero) also matches successful counts like 10/20, which will misclassify valid element discovery as ELEMENT_NOT_FOUND and can trigger the wrong recovery path when routing on the primary category.

Useful? React with 👍 / 👎.

},
{
category: 'CAPTCHA_OR_WAF',
confidence: 0.86,
reason: 'Page indicates CAPTCHA, WAF, bot detection, or access-denied block',
test: (input) => {
const combined = `${input.text} ${input.currentUrl} ${input.pageTitle}`;
if (!WAF_CONTEXT.test(combined)) return false;
// Access denied is ambiguous. Treat it as auth only when auth context is present.
if (/access denied/i.test(combined) && AUTH_CONTEXT.test(combined)) return false;
return true;
},
},
{
category: 'AUTH_REQUIRED',
confidence: 0.84,
reason: 'Page or failure indicates missing/expired authentication or credentials',
test: (input) => {
const combined = `${input.text} ${input.currentUrl} ${input.pageTitle}`;
return AUTH_CONTEXT.test(combined) || AUTH_DIRECT.test(combined) || (FORBIDDEN_SIGNAL.test(combined) && AUTH_CONTEXT.test(combined));
},
},
{
category: 'NO_PROGRESS',
confidence: 0.82,
reason: 'Recent actions are stalling or made no meaningful progress',
test: ({ text, hintRule }) => /\b(progress-tracker-stuck|progress-tracker-stalling|no meaningful progress|stalling|stuck|same-tool-same-result|tool-oscillation|coordinate-click-stall)\b/i.test(`${hintRule} ${text}`),
},
{
category: 'LLM_WANDERING',
confidence: 0.78,
reason: 'Repeated low-value actions suggest agent wandering',
test: ({ text, hintRule }) => /\b(wandering|oscillation|coordinate-click-stall|screenshot-verification-loop|same-tool-same-result|multiple coordinate clicks|multiple screenshots|escalation ladder)\b/i.test(`${hintRule} ${text}`),
},
{
category: 'MAX_STEPS_EXCEEDED',
confidence: 0.9,
reason: 'Execution exceeded configured step or tool-call budget',
test: ({ text }) => /\b(max steps|max number of|maximum steps|step limit|max iterations|max tool calls|budget exceeded)\b/i.test(text),
},
{
category: 'POSTCONDITION_FAILED',
confidence: 0.9,
reason: 'Outcome contract or postcondition did not pass',
test: ({ text }) => /\b(postcondition(?:_| )violation|postcondition failed|success criteria not met|contract.+failed|assertion failed|oc_assert.+failed)\b/i.test(text),
},
];

export function classifyFailure(input: ClassifyFailureInput = {}): FailureClassification[] {
const normalized = normalize(input);
const found = new Map<FailureCategory, FailureClassification>();

for (const rule of RULES) {
if (!rule.test(normalized)) continue;
const prev = found.get(rule.category);
if (!prev || rule.confidence > prev.confidence) {
found.set(rule.category, {
category: rule.category,
confidence: rule.confidence,
reason: rule.reason,
});
}
}

const results = [...found.values()].sort((a, b) => b.confidence - a.confidence || a.category.localeCompare(b.category));
if (results.length === 0 && input.fallbackToUnknown !== false) {
return [{ category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' }];
}
return results;
}

export function primaryFailureCategory(input: ClassifyFailureInput & { fallbackToUnknown: false }): FailureClassification | undefined;
export function primaryFailureCategory(input?: ClassifyFailureInput): FailureClassification;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Return undefined in overload when fallback may be false

Update the general overload to allow undefined because this function can return undefined whenever fallbackToUnknown is false at runtime, even if that value is not a literal false in the call site. As written, callers passing a computed ClassifyFailureInput get the FailureClassification return type and may dereference .category unsafely, causing runtime crashes when no rule matches.

Useful? React with 👍 / 👎.

export function primaryFailureCategory(input: ClassifyFailureInput = {}): FailureClassification | undefined {
Comment on lines +148 to +149
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Return optional type when unknown fallback is disabled

The overloads advertise that primaryFailureCategory returns a FailureClassification for general ClassifyFailureInput, but the implementation returns undefined when fallbackToUnknown is false and no rule matches. This is a type-safety hole for callers passing a value typed as ClassifyFailureInput (not literal false), because TypeScript will permit direct .category access that can fail at runtime.

Useful? React with 👍 / 👎.

const [classification] = classifyFailure(input);
if (classification || input.fallbackToUnknown === false) return classification;
return { category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' };
}

function normalize(input: ClassifyFailureInput): NormalizedFailureInput {
const errorName = errorTypeName(input.error);
const textParts = [
stringifyError(input.error),
input.message,
].filter(Boolean);
return {
text: textParts.join(' ').toLowerCase(),
errorName: errorName.toLowerCase(),
toolName: (input.toolName ?? '').toLowerCase(),
hintRule: input.hintRule ?? '',
currentUrl: input.currentUrl ?? '',
pageTitle: input.pageTitle ?? '',
};
}

function errorTypeName(error: unknown): string {
if (!error || typeof error !== 'object') return '';
const ctor = (error as { constructor?: { name?: string } }).constructor?.name;
const name = (error as { name?: unknown }).name;
return typeof name === 'string' ? name : ctor ?? '';
}

function stringifyError(error: unknown): string {
if (error === undefined || error === null) return '';
if (typeof error === 'string') return error;
if (error instanceof Error) return `${error.name}: ${error.message}`;
try {
return JSON.stringify(error);
} catch {
return String(error);
}
}
2 changes: 2 additions & 0 deletions src/failure/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export * from './categories.js';
export * from './classifier.js';
93 changes: 93 additions & 0 deletions tests/failure/classifier.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { classifyFailure, primaryFailureCategory } from '../../src/failure';

function categories(message: string, extra: Parameters<typeof classifyFailure>[0] = {}) {
return classifyFailure({ message, ...extra }).map((r) => r.category);
}

describe('failure classifier', () => {
it('classifies stale refs', () => {
expect(primaryFailureCategory({ message: 'Error: stale ref abc is no longer available' }).category).toBe('STALE_REF');
});

it('classifies missing elements', () => {
expect(categories('selector failed: no matching element found')).toContain('ELEMENT_NOT_FOUND');
});

it('classifies navigation timeouts', () => {
expect(primaryFailureCategory({ toolName: 'navigate', message: 'Navigation timeout of 30000 ms exceeded' }).category).toBe('NAVIGATION_TIMEOUT');
});

it('classifies tab and target failures', () => {
expect(categories('invalid tab: no such tab')).toContain('TAB_UNHEALTHY');
expect(categories('CDPSession connection closed')).toContain('CONNECTION_LOST');
});

it('classifies browser crashes', () => {
const error = new Error('Target closed because the browser crash closed the renderer');
error.name = 'TargetClosedError';
expect(categories('', { error })).toContain('BROWSER_CRASH');
});

it('classifies auth-required access denied separately from WAF access denied', () => {
expect(primaryFailureCategory({ message: 'Access denied: login session expired, please sign in' }).category).toBe('AUTH_REQUIRED');
expect(primaryFailureCategory({ message: 'Access Denied reference from Akamai bot block' }).category).toBe('CAPTCHA_OR_WAF');
});

it('does not treat bare forbidden responses as auth-required', () => {
expect(categories('403 Forbidden')).not.toContain('AUTH_REQUIRED');
expect(primaryFailureCategory({ message: '403 Forbidden', fallbackToUnknown: false })).toBeUndefined();
expect(categories('Forbidden: login session expired')).toContain('AUTH_REQUIRED');
});

it('does not classify generic 403 forbidden server errors as auth-required', () => {
expect(categories('HTTP 403 Forbidden server error')).not.toContain('AUTH_REQUIRED');
});

it('classifies forbidden permission and auth contexts as auth-required', () => {
expect(categories('Forbidden: missing permission to access this tool')).toContain('AUTH_REQUIRED');
expect(categories('403 Forbidden: authentication credentials are required')).toContain('AUTH_REQUIRED');
});

it('classifies CAPTCHA and WAF blockers', () => {
expect(categories('Cloudflare says verify you are human captcha detected')).toContain('CAPTCHA_OR_WAF');
});

it('maps progress tracker stuck hints to no progress and wandering', () => {
const result = classifyFailure({ hintRule: 'progress-tracker-stuck', message: 'STOP — no meaningful progress, screenshot-verification-loop' });
expect(result.map((r) => r.category)).toEqual(expect.arrayContaining(['NO_PROGRESS', 'LLM_WANDERING']));
});

it('classifies step budget and postcondition failures', () => {
expect(categories('Reached the max number of 10 steps')).toContain('MAX_STEPS_EXCEEDED');
expect(categories('postcondition_violation: oc_assert failed')).toContain('POSTCONDITION_FAILED');
});

it('falls back to UNKNOWN by default and can suppress fallback', () => {
expect(classifyFailure({ message: 'some unrecognized failure' })).toEqual([
{ category: 'UNKNOWN', confidence: 0.5, reason: 'No failure classifier rule matched' },
]);
expect(classifyFailure({ message: 'some unrecognized failure', fallbackToUnknown: false })).toEqual([]);
});

it('classifies protocol errors for missing DOM nodes as stale references, not connection loss', () => {
const result = primaryFailureCategory({
error: new Error('Protocol error (DOM.resolveNode): No node with given id found'),
toolName: 'click',
});

expect(result.category).toBe('STALE_REF');
});

it('does not classify navigation context churn as connection loss', () => {
expect(categories('Execution context was destroyed, most likely because of a navigation')).not.toContain('CONNECTION_LOST');
expect(categories('Cannot find context with specified id')).not.toContain('CONNECTION_LOST');
expect(categories('Inspected target navigated or closed')).not.toContain('CONNECTION_LOST');
expect(categories('Protocol error (Runtime.callFunctionOn): Inspected target navigated or closed')).not.toContain('CONNECTION_LOST');
});

it('keeps generic could-not-find runtime failures out of element-not-found', () => {
expect(categories('Could not find expected browser (chrome) locally')).not.toContain('ELEMENT_NOT_FOUND');
expect(categories('Could not find element for selector .submit')).toContain('ELEMENT_NOT_FOUND');
});

});
7 changes: 4 additions & 3 deletions tests/tools/console-capture-regression.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
* Regression fixture test for console_capture tool (#897).
*
* Verifies that for a frozen 100-entry input (cap not hit), the `get` response
* fields excluding `bufferStats` are byte-identical to the v1.11.0 baseline
* captured at tests/fixtures/console-capture/baseline-v1.11.0.json.
* fields excluding `bufferStats` match the v1.11.0 baseline captured at
* tests/fixtures/console-capture/baseline-v1.11.0.json. Fixture newlines are
* normalized because Windows checkouts may convert LF to CRLF.
*
* This test protects against future regressions, not against this PR's own changes.
* The fixture was captured from the post-change code with a 100-log input.
Expand Down Expand Up @@ -132,7 +133,7 @@ const FIXTURE_PATH = path.join(
);

describe('console_capture get response — v1.11.0 baseline regression', () => {
test('response shape (excluding bufferStats) matches baseline fixture byte-for-byte', () => {
test('response shape (excluding bufferStats) matches baseline fixture', () => {
const frozenLogs = buildFrozenLogs();
const response = buildGetResponse(frozenLogs);
const responseJson = JSON.stringify(response, null, 2);
Expand Down
Loading