diff --git a/specs/jsonl-schema.json b/specs/jsonl-schema.json index a52f538c..7da1e65e 100644 --- a/specs/jsonl-schema.json +++ b/specs/jsonl-schema.json @@ -322,6 +322,7 @@ "type": "string", "enum": [ "auth_failed", + "provider_unavailable", "sdk_error", "subprocess_failure", "max_turns", diff --git a/specs/reporters.md b/specs/reporters.md index 32d7431d..2e894b41 100644 --- a/specs/reporters.md +++ b/specs/reporters.md @@ -554,7 +554,7 @@ New run logs are homogeneous: every line is a chunk result. Each line is self-co **HunkFailure**: `{ type: "analysis" | "extraction", filename: string, lineRange: string, code: ErrorCode, message: string, preview?: string, attempts?: int }` -**ErrorCode**: one of `"auth_failed"`, `"sdk_error"`, `"subprocess_failure"`, `"max_turns"`, `"aborted"`, `"all_hunks_failed"`, `"skill_resolution_failed"`, `"extraction_invalid_json"`, `"extraction_unbalanced_json"`, `"extraction_no_findings_json"`, `"extraction_missing_findings_key"`, `"extraction_findings_not_array"`, `"extraction_llm_failed"`, `"extraction_llm_timeout"`, `"extraction_no_api_key"`, `"unknown"`. Stable public contract. +**ErrorCode**: one of `"auth_failed"`, `"provider_unavailable"`, `"sdk_error"`, `"subprocess_failure"`, `"max_turns"`, `"aborted"`, `"all_hunks_failed"`, `"skill_resolution_failed"`, `"extraction_invalid_json"`, `"extraction_unbalanced_json"`, `"extraction_no_findings_json"`, `"extraction_missing_findings_key"`, `"extraction_findings_not_array"`, `"extraction_llm_failed"`, `"extraction_llm_timeout"`, `"extraction_no_api_key"`, `"unknown"`. Stable public contract. **BySeverity**: `{ high: int, medium: int, low: int }`. Legacy 5-level keys (`critical`, `info`) are still accepted on read for backward compatibility with older logs and normalized to `high`/`low`; new output is strictly 3-level. diff --git a/src/action/triggers/executor.ts b/src/action/triggers/executor.ts index cae37014..31ad5d17 100644 --- a/src/action/triggers/executor.ts +++ b/src/action/triggers/executor.ts @@ -24,9 +24,10 @@ import { } from '../../output/github-checks.js'; import { logGroup, logGroupEnd } from '../workflow/base.js'; import { DEFAULT_FILE_CONCURRENCY } from '../../sdk/types.js'; -import { SkillRunnerError } from '../../sdk/errors.js'; +import { SkillRunnerError, classifyError } from '../../sdk/errors.js'; import type { Semaphore } from '../../utils/index.js'; import { Verbosity } from '../../cli/output/verbosity.js'; +import type { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js'; /** Log-mode output for CI: no TTY, no color. */ const CI_OUTPUT_MODE: OutputMode = { isTTY: false, supportsColor: false, columns: 120 }; @@ -56,6 +57,10 @@ export interface TriggerExecutorDeps { globalFailCheck?: boolean; /** Global semaphore for limiting concurrent file analyses across triggers */ semaphore?: Semaphore; + /** Shared controller for stopping the whole action run */ + abortController?: AbortController; + /** Shared circuit breaker for auth/provider failures */ + circuitBreaker?: ProviderFailureCircuitBreaker; } /** @@ -144,6 +149,8 @@ export async function executeTrigger( pathToClaudeCodeExecutable: claudePath, auxiliaryMaxRetries: trigger.auxiliaryMaxRetries, verifyFindings: trigger.verifyFindings, + abortController: deps.abortController, + circuitBreaker: deps.circuitBreaker, }, }; @@ -216,8 +223,12 @@ export async function executeTrigger( }; } catch (error) { if (error instanceof ActionFailedError) throw error; + const { code } = classifyError(error); Sentry.captureException(error, { tags: { 'trigger.name': trigger.name, 'skill.name': trigger.skill }, + ...(code === 'provider_unavailable' || code === 'all_hunks_failed' + ? { fingerprint: ['warden', code] } + : {}), }); // Mark skill check as failed diff --git a/src/action/workflow/pr-workflow.ts b/src/action/workflow/pr-workflow.ts index 2e15d78f..49cbef77 100644 --- a/src/action/workflow/pr-workflow.ts +++ b/src/action/workflow/pr-workflow.ts @@ -39,6 +39,7 @@ import type { TriggerResult } from '../triggers/executor.js'; import { postTriggerReview } from '../review/poster.js'; import { shouldResolveStaleComments } from '../review/coordination.js'; import type { RuntimeName } from '../../sdk/runtimes/index.js'; +import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js'; import { createCoreCheck, updateCoreCheck, @@ -318,6 +319,8 @@ async function executeAllTriggers( // Global semaphore gates file-level work across all triggers. // All triggers launch immediately; the semaphore limits concurrent file analyses. const semaphore = new Semaphore(concurrency); + const abortController = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController }); return runPool( matchedTriggers, @@ -334,7 +337,10 @@ async function executeAllTriggers( globalRequestChanges: inputs.requestChanges, globalFailCheck: inputs.failCheck, semaphore, + abortController, + circuitBreaker, }), + { shouldAbort: () => abortController.signal.aborted }, ); } diff --git a/src/cli/output/ink-runner.tsx b/src/cli/output/ink-runner.tsx index a60f7172..5b2cb9af 100644 --- a/src/cli/output/ink-runner.tsx +++ b/src/cli/output/ink-runner.tsx @@ -31,6 +31,7 @@ import { Verbosity } from './verbosity.js'; import { ICON_CHECK, ICON_SKIPPED, ICON_PENDING, ICON_ERROR, SPINNER_FRAMES } from './icons.js'; import figures from 'figures'; import type { SkillReport } from '../../types/index.js'; +import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js'; interface SkillRunnerProps { skills: SkillState[]; @@ -269,7 +270,14 @@ export async function runSkillTasksWithInk( if (tasks.length === 0 || verbosity === Verbosity.Quiet) { // No tasks or quiet mode - run without UI using global semaphore. const semaphore = new Semaphore(concurrency); - const composedTasks = composeTasksWithFailFast(tasks, failFastController); + const circuitAbortController = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController: circuitAbortController }); + const composedTasks = composeTasksWithFailFast( + tasks, + failFastController, + circuitBreaker, + circuitAbortController, + ); const callbacks: SkillProgressCallbacks = { ...noopCallbacks, ...(fireStreamHook || failFastController @@ -456,8 +464,15 @@ export async function runSkillTasksWithInk( // Global semaphore gates file-level work across all skills. const semaphore = new Semaphore(concurrency); - // Compose per-task abort controllers: fire on either SIGINT or fail-fast - const composedTasks = composeTasksWithFailFast(tasks, failFastController); + // Compose per-task abort controllers: fire on SIGINT, fail-fast, or provider circuit breaker. + const circuitAbortController = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController: circuitAbortController }); + const composedTasks = composeTasksWithFailFast( + tasks, + failFastController, + circuitBreaker, + circuitAbortController, + ); // Launch all skills in parallel; the semaphore is the sole concurrency gate. const results = await runComposedSkillTasks(composedTasks, callbacks, semaphore); diff --git a/src/cli/output/tasks.test.ts b/src/cli/output/tasks.test.ts index 270ccb3e..b5b54424 100644 --- a/src/cli/output/tasks.test.ts +++ b/src/cli/output/tasks.test.ts @@ -9,6 +9,7 @@ import type { HunkWithContext } from '../../diff/index.js'; import type { SkillDefinition } from '../../config/schema.js'; import { Semaphore, runPool } from '../../utils/index.js'; import { SkillRunnerError, WardenAuthenticationError } from '../../sdk/errors.js'; +import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js'; import * as sdkRunner from '../../sdk/runner.js'; function makeFinding(overrides: Partial = {}): Finding { @@ -800,7 +801,7 @@ describe('runSkillTask all-hunks-fail synthesis', () => { vi.restoreAllMocks(); }); - it('synthesizes a report with error.code=all_hunks_failed when every hunk fails', async () => { + it('synthesizes a report with error.code=auth_failed when every hunk fails with auth errors', async () => { const fakeHunk = { hunk: { newStart: 1, newCount: 10 }, } as unknown as HunkWithContext; @@ -839,7 +840,7 @@ describe('runSkillTask all-hunks-fail synthesis', () => { const result = await runSkillTask(options, 1, { ...noopCallbacks(), onSkillError }); expect(result.report).toBeDefined(); - expect(result.report!.error?.code).toBe('all_hunks_failed'); + expect(result.report!.error?.code).toBe('auth_failed'); expect(result.report!.findings).toEqual([]); expect(result.report!.failedHunks).toBe(1); expect(result.report!.hunkFailures).toEqual(hunkFailures); @@ -848,7 +849,7 @@ describe('runSkillTask all-hunks-fail synthesis', () => { // re-throw (action executor, Sentry) preserve the ErrorCode. A missing // error here produces a plain Error downstream and loses classification. expect(result.error).toBeInstanceOf(SkillRunnerError); - expect((result.error as SkillRunnerError).code).toBe('all_hunks_failed'); + expect((result.error as SkillRunnerError).code).toBe('auth_failed'); // Per-file metadata must be present even on failure runs — `warden runs` // and JSONL consumers count attempted files via report.files. Empty // files would show totalFiles: 0 for an all-hunks-failed run. @@ -856,6 +857,148 @@ describe('runSkillTask all-hunks-fail synthesis', () => { expect(result.report!.files![0]!.filename).toBe('a.ts'); }); + it('preserves auth_failed when all analysis failures are auth alongside extraction failures', async () => { + const fakeHunks = [ + { hunk: { newStart: 1, newCount: 10 } }, + { hunk: { newStart: 20, newCount: 5 } }, + ] as unknown as HunkWithContext[]; + const hunkFailures: HunkFailure[] = [ + { type: 'analysis', filename: 'a.ts', lineRange: '1-10', code: 'auth_failed', message: 'bad key' }, + { + type: 'extraction', + filename: 'a.ts', + lineRange: '20-24', + code: 'extraction_invalid_json', + message: 'invalid_json', + }, + ]; + + vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({ + files: [{ filename: 'a.ts', hunks: fakeHunks }], + skippedFiles: [], + }); + vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({ + filename: 'a.ts', + findings: [], + usage: { inputTokens: 0, outputTokens: 0, costUSD: 0 }, + failedHunks: 1, + failedExtractions: 1, + hunkFailures, + }); + + const options: SkillTaskOptions = { + name: 'mixed-fail-skill', + resolveSkill: async () => + ({ name: 'mixed-fail-skill', definition: '', files: [] } as unknown as SkillDefinition), + context: { + eventType: 'pull_request', + repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' }, + repoPath: '/tmp', + pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] }, + } as unknown as SkillTaskOptions['context'], + }; + + const result = await runSkillTask(options, 1, noopCallbacks()); + + expect(result.report!.error?.code).toBe('auth_failed'); + expect(result.report!.failedHunks).toBe(1); + expect(result.report!.failedExtractions).toBe(1); + expect((result.error as SkillRunnerError).code).toBe('auth_failed'); + }); + + it('synthesizes provider_unavailable when every hunk fails with provider errors', async () => { + const fakeHunk = { + hunk: { newStart: 1, newCount: 10 }, + } as unknown as HunkWithContext; + const hunkFailures: HunkFailure[] = [ + { + type: 'analysis', + filename: 'a.ts', + lineRange: '1-10', + code: 'provider_unavailable', + message: 'Claude Code process exited with code 1', + }, + ]; + + vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({ + files: [{ filename: 'a.ts', hunks: [fakeHunk] }], + skippedFiles: [], + }); + vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({ + filename: 'a.ts', + findings: [], + usage: { inputTokens: 0, outputTokens: 0, costUSD: 0 }, + failedHunks: 1, + failedExtractions: 0, + hunkFailures, + }); + + const options: SkillTaskOptions = { + name: 'provider-fail-skill', + resolveSkill: async () => + ({ name: 'provider-fail-skill', definition: '', files: [] } as unknown as SkillDefinition), + context: { + eventType: 'pull_request', + repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' }, + repoPath: '/tmp', + pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] }, + } as unknown as SkillTaskOptions['context'], + }; + + const result = await runSkillTask(options, 1, noopCallbacks()); + + expect(result.report!.error?.code).toBe('provider_unavailable'); + expect(result.report!.error?.message).toContain('Provider unavailable'); + expect((result.error as SkillRunnerError).code).toBe('provider_unavailable'); + }); + + it('ignores unrelated circuit state when this skill completed without failures', async () => { + const fakeHunk = { + hunk: { newStart: 1, newCount: 10 }, + } as unknown as HunkWithContext; + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 1, + }); + circuitBreaker.recordFailure('provider_unavailable', 'temporary outage'); + + vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({ + files: [{ filename: 'a.ts', hunks: [fakeHunk] }], + skippedFiles: [], + }); + vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({ + filename: 'a.ts', + findings: [], + usage: { inputTokens: 1, outputTokens: 1, costUSD: 0.001 }, + failedHunks: 0, + failedExtractions: 0, + hunkFailures: [], + }); + + const options: SkillTaskOptions = { + name: 'clean-skill', + resolveSkill: async () => + ({ name: 'clean-skill', definition: '', files: [] } as unknown as SkillDefinition), + context: { + eventType: 'pull_request', + repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' }, + repoPath: '/tmp', + pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] }, + } as unknown as SkillTaskOptions['context'], + runnerOptions: { circuitBreaker }, + }; + + const onSkillError = vi.fn(); + const result = await runSkillTask(options, 1, { ...noopCallbacks(), onSkillError }); + + expect(result.error).toBeUndefined(); + expect(result.report).toBeDefined(); + expect(result.report!.error).toBeUndefined(); + expect(result.report!.findings).toEqual([]); + expect(result.report!.failedHunks).toBeUndefined(); + expect(result.report!.failedExtractions).toBeUndefined(); + expect(onSkillError).not.toHaveBeenCalled(); + }); + it('triggers all_hunks_failed when every hunk succeeded at SDK level but extraction failed for all', async () => { // Regression test for the if/else mutual-exclusion change: each hunk // contributes to either failedHunks OR failedExtractions, not both. diff --git a/src/cli/output/tasks.ts b/src/cli/output/tasks.ts index c657f51e..86d1dc1a 100644 --- a/src/cli/output/tasks.ts +++ b/src/cli/output/tasks.ts @@ -5,7 +5,7 @@ * Reporter spec: specs/reporters.md */ -import type { SkillReport, SeverityThreshold, ConfidenceThreshold, Finding, UsageStats, EventContext, HunkFailure, AuxiliaryUsageMap } from '../../types/index.js'; +import type { SkillReport, SeverityThreshold, ConfidenceThreshold, Finding, UsageStats, EventContext, HunkFailure, AuxiliaryUsageMap, ErrorCode } from '../../types/index.js'; import type { SkillDefinition } from '../../config/schema.js'; import { Sentry, emitSkillMetrics, logger } from '../../sentry.js'; import { SkillRunnerError, WardenAuthenticationError, classifyError } from '../../sdk/errors.js'; @@ -24,6 +24,7 @@ import { type ChunkAnalysisResult, type FindingProcessingEvent, } from '../../sdk/runner.js'; +import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js'; import chalk from 'chalk'; import figures from 'figures'; import { Verbosity } from './verbosity.js'; @@ -46,6 +47,47 @@ interface FileProcessResult { auxiliaryUsage?: AuxiliaryUsageEntry[]; } +function allAnalysisFailuresHaveCode( + hunkFailures: HunkFailure[], + code: ErrorCode, +): boolean { + const analysisFailures = hunkFailures.filter((failure) => failure.type === 'analysis'); + return ( + analysisFailures.length > 0 + && analysisFailures.every((failure) => failure.code === code) + ); +} + +function summarizeRunFailure(args: { + totalHunks: number; + hunkFailures: HunkFailure[]; + circuitReason?: { code: ErrorCode; message: string }; +}): { code: ErrorCode; message: string } { + const { totalHunks, hunkFailures, circuitReason } = args; + if (circuitReason) { + return circuitReason; + } + if (allAnalysisFailuresHaveCode(hunkFailures, 'auth_failed')) { + return { + code: 'auth_failed', + message: 'Authentication failed. Warden stopped early.', + }; + } + if (allAnalysisFailuresHaveCode(hunkFailures, 'provider_unavailable')) { + return { + code: 'provider_unavailable', + message: `Provider unavailable: all ${totalHunks} chunk${totalHunks === 1 ? '' : 's'} failed to analyze. Warden stopped early.`, + }; + } + return { + code: 'all_hunks_failed', + message: + `All ${totalHunks} chunk${totalHunks === 1 ? '' : 's'} failed to analyze. ` + + `This usually indicates an authentication problem. ` + + `Verify WARDEN_ANTHROPIC_API_KEY is set correctly, or run 'claude login' if using Claude Code subscription.`, + }; +} + /** * Write a log-mode message to stderr with timestamp prefix. * Used for non-TTY / plain output. @@ -458,20 +500,28 @@ export async function runSkillTask( // failed — a silent zero-findings run otherwise. const totalAttemptFailures = totalFailedHunks + totalFailedExtractions; + const circuitReason = runnerOptions.circuitBreaker?.reason; if ( totalHunks > 0 - && totalAttemptFailures === totalHunks && allFindings.length === 0 - && !(runnerOptions.abortController?.signal.aborted ?? false) + && totalAttemptFailures > 0 + && ( + circuitReason + || ( + totalAttemptFailures === totalHunks + && !(runnerOptions.abortController?.signal.aborted ?? false) + ) + ) ) { const auxUsage = aggregateAuxiliaryUsage(allAuxEntries); - const errorMessage = - `All ${totalHunks} chunk${totalHunks === 1 ? '' : 's'} failed to analyze. ` + - `This usually indicates an authentication problem. ` + - `Verify WARDEN_ANTHROPIC_API_KEY is set correctly, or run 'claude login' if using Claude Code subscription.`; + const error = summarizeRunFailure({ + totalHunks, + hunkFailures: allHunkFailures, + circuitReason, + }); const errorReport: SkillReport = { skill: skill.name, - summary: `${skill.name}: all chunks failed`, + summary: `${skill.name}: failed (${error.code})`, findings: [], usage: aggregateUsage(allUsage), durationMs: duration, @@ -492,15 +542,15 @@ export async function runSkillTask( failedHunks: totalFailedHunks, hunkFailures: allHunkFailures, error: { - code: 'all_hunks_failed', - message: errorMessage, + code: error.code, + message: error.message, timestamp: new Date().toISOString(), }, }; if (totalFailedExtractions > 0) errorReport.failedExtractions = totalFailedExtractions; if (skippedFiles.length > 0) errorReport.skippedFiles = skippedFiles; if (auxUsage) errorReport.auxiliaryUsage = auxUsage; - callbacks.onSkillError(name, errorMessage); + callbacks.onSkillError(name, error.message); // Mirror the success path: emit a final completion event with the // (errored) report so terminal renderers print the per-skill // summary line. Without this, console mode shows the error string @@ -515,7 +565,7 @@ export async function runSkillTask( callbacks.onSkillComplete(name, errorReport); // Carry a typed error alongside the report so consumers that re-throw // (action executor, Sentry.captureException) preserve the ErrorCode. - const runnerError = new SkillRunnerError(errorMessage, { code: 'all_hunks_failed' }); + const runnerError = new SkillRunnerError(error.message, { code: error.code }); return { name, report: errorReport, error: runnerError, failOn, minConfidence }; } @@ -822,13 +872,10 @@ export function createDefaultCallbacks( }; } -/** - * Create an AbortController that fires when either of two controllers abort. - */ -function composeAbortControllers(a?: AbortController, b?: AbortController): AbortController { +function composeAbortControllers(...controllers: (AbortController | undefined)[]): AbortController { const composed = new AbortController(); - for (const ctrl of [a, b]) { + for (const ctrl of controllers) { if (ctrl?.signal.aborted) { composed.abort(); return composed; @@ -840,20 +887,26 @@ function composeAbortControllers(a?: AbortController, b?: AbortController): Abor } /** - * Overlay a fail-fast abort controller onto each task's runner options. - * Returns the original tasks unchanged when no controller is provided. + * Share abort/circuit state across task runner options. */ export function composeTasksWithFailFast( tasks: SkillTaskOptions[], - failFastController?: AbortController + failFastController?: AbortController, + circuitBreaker?: ProviderFailureCircuitBreaker, + circuitAbortController?: AbortController, ): SkillTaskOptions[] { - if (!failFastController) return tasks; + if (!failFastController && !circuitBreaker && !circuitAbortController) return tasks; return tasks.map((task) => ({ ...task, runnerOptions: { ...task.runnerOptions, - abortController: composeAbortControllers(task.runnerOptions?.abortController, failFastController), + abortController: composeAbortControllers( + task.runnerOptions?.abortController, + failFastController, + circuitAbortController, + ), + circuitBreaker: task.runnerOptions?.circuitBreaker ?? circuitBreaker, }, })); } @@ -920,12 +973,21 @@ export async function runSkillTasks( console.error(chalk.bold('SKILLS')); } + const circuitAbortController = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController: circuitAbortController }); + const composedTasks = composeTasksWithFailFast( + tasks, + failFastController, + circuitBreaker, + circuitAbortController, + ); + // Listen for abort signal to show interrupt message (non-TTY only; Ink handles TTY) - const abortSignal = tasks[0]?.runnerOptions?.abortController?.signal; + const abortSignal = composedTasks[0]?.runnerOptions?.abortController?.signal; if (abortSignal && !abortSignal.aborted && !mode.isTTY && verbosity !== Verbosity.Quiet) { abortSignal.addEventListener('abort', () => { // Only show interrupt message for user SIGINT, not fail-fast - if (!failFastController?.signal.aborted) { + if (!failFastController?.signal.aborted && !circuitAbortController.signal.aborted) { logPlain('Interrupted, finishing up... (press Ctrl+C again to force exit)'); } }, { once: true }); @@ -938,9 +1000,6 @@ export async function runSkillTasks( }, { once: true }); } - // Compose per-task abort controllers: fire on either SIGINT or fail-fast - const composedTasks = composeTasksWithFailFast(tasks, failFastController); - // Launch all skills in parallel; the semaphore is the sole concurrency gate. return runComposedSkillTasks(composedTasks, wrappedCallbacks, semaphore); } diff --git a/src/sdk/analyze.test.ts b/src/sdk/analyze.test.ts index 1b68365e..421f48eb 100644 --- a/src/sdk/analyze.test.ts +++ b/src/sdk/analyze.test.ts @@ -1,10 +1,12 @@ import { describe, it, expect, vi, afterEach } from 'vitest'; +import { APIError } from '@anthropic-ai/sdk'; import type { SkillDefinition } from '../config/schema.js'; import type { HunkWithContext } from '../diff/index.js'; -import type { Finding } from '../types/index.js'; -import { analyzeFile, filterOutOfRangeFindings } from './analyze.js'; +import type { EventContext, Finding, UsageStats } from '../types/index.js'; +import { analyzeFile, filterOutOfRangeFindings, runSkill } from './analyze.js'; import type { PreparedFile } from './types.js'; import { getRuntime, type Runtime } from './runtimes/index.js'; +import { ProviderFailureCircuitBreaker } from './circuit-breaker.js'; vi.mock('./runtimes/index.js', () => ({ getRuntime: vi.fn(), @@ -31,31 +33,109 @@ function makeGeneralFinding(id = 'general'): Finding { }; } +function makeUsage(): UsageStats { + return { inputTokens: 10, outputTokens: 5, costUSD: 0.001 }; +} + function makeAbortError(): Error { const error = new Error('The operation was aborted'); error.name = 'AbortError'; return error; } -function makePreparedFile(): PreparedFile { - const hunk: HunkWithContext = { +function makePreparedFile(hunkCount = 1): PreparedFile { + const hunks: HunkWithContext[] = Array.from({ length: hunkCount }, (_, index) => { + const line = index + 1; + return { + filename: 'src/example.ts', + hunk: { + oldStart: line, + oldCount: 1, + newStart: line, + newCount: 1, + content: `@@ -${line},1 +${line},1 @@\n-old\n+new`, + lines: ['-old', '+new'], + }, + contextBefore: [], + contextAfter: [], + contextStartLine: line, + language: 'typescript', + }; + }); + return { filename: 'src/example.ts', - hunk: { - oldStart: 1, - oldCount: 1, - newStart: 1, - newCount: 1, - content: '@@ -1,1 +1,1 @@\n-old\n+new', - lines: ['-old', '+new'], + hunks, + }; +} + +function makeContextWithThreeHunks(): EventContext { + return { + eventType: 'pull_request', + action: 'opened', + repository: { owner: 'o', name: 'r', fullName: 'o/r', defaultBranch: 'main' }, + repoPath: '/tmp/repo', + pullRequest: { + number: 1, + title: 'Test PR', + body: '', + author: 'test', + baseBranch: 'main', + headBranch: 'feature', + headSha: 'head', + baseSha: 'base', + files: [{ + filename: 'src/example.ts', + status: 'modified', + additions: 3, + deletions: 3, + patch: [ + '@@ -10,1 +10,1 @@', + '-old10', + '+new10', + '@@ -100,1 +100,1 @@', + '-old100', + '+new100', + '@@ -200,1 +200,1 @@', + '-old200', + '+new200', + ].join('\n'), + chunks: 3, + }], }, - contextBefore: [], - contextAfter: [], - contextStartLine: 1, - language: 'typescript', }; +} + +function makeContextWithTwoHunks(): EventContext { return { - filename: 'src/example.ts', - hunks: [hunk], + eventType: 'pull_request', + action: 'opened', + repository: { owner: 'o', name: 'r', fullName: 'o/r', defaultBranch: 'main' }, + repoPath: '/tmp/repo', + pullRequest: { + number: 1, + title: 'Test PR', + body: '', + author: 'test', + baseBranch: 'main', + headBranch: 'feature', + headSha: 'head', + baseSha: 'base', + files: [{ + filename: 'src/example.ts', + status: 'modified', + additions: 2, + deletions: 2, + patch: [ + '@@ -10,1 +10,1 @@', + '-old10', + '+new10', + '@@ -100,1 +100,1 @@', + '-old100', + '+new100', + ].join('\n'), + chunks: 2, + }], + }, }; } @@ -176,4 +256,322 @@ describe('analyzeFile', () => { expect(consoleSpy).not.toHaveBeenCalledWith(expect.stringContaining('All retry attempts failed')); consoleSpy.mockRestore(); }); + + it('opens the shared circuit after consecutive provider failures', async () => { + const controller = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 2, + abortController: controller, + }); + const runSkill = vi.fn(async () => { + throw new Error('Claude Code process exited with code 1'); + }); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => undefined); + const onChunkComplete = vi.fn(); + const skill: SkillDefinition = { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }; + + const result = await analyzeFile( + skill, + makePreparedFile(3), + '/tmp/repo', + { + abortController: controller, + circuitBreaker, + retry: { + maxRetries: 0, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + }, + }, + { onChunkComplete }, + ); + + expect(runSkill).toHaveBeenCalledTimes(2); + expect(controller.signal.aborted).toBe(true); + expect(circuitBreaker.reason?.code).toBe('provider_unavailable'); + expect(result.failedHunks).toBe(2); + expect(result.hunkFailures.map((failure) => failure.code)).toEqual([ + 'provider_unavailable', + 'provider_unavailable', + ]); + expect(result.hunkFailures[1]!.message).toContain('Provider unavailable after 2 consecutive failures'); + expect(onChunkComplete).toHaveBeenCalledTimes(2); + consoleSpy.mockRestore(); + }); + + it('counts provider failures once per hunk after retries are exhausted', async () => { + const controller = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 2, + abortController: controller, + }); + const runSkill = vi.fn(async () => { + throw new APIError( + 529, + { error: { type: 'overloaded_error', message: 'overloaded' } }, + 'overloaded', + undefined + ); + }); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => undefined); + const skill: SkillDefinition = { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }; + + const result = await analyzeFile( + skill, + makePreparedFile(), + '/tmp/repo', + { + abortController: controller, + circuitBreaker, + retry: { + maxRetries: 2, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + }, + }, + ); + + expect(runSkill).toHaveBeenCalledTimes(3); + expect(controller.signal.aborted).toBe(false); + expect(circuitBreaker.reason).toBeUndefined(); + expect(result.failedHunks).toBe(1); + expect(result.hunkFailures[0]?.code).toBe('provider_unavailable'); + consoleSpy.mockRestore(); + }); + + it('preserves non-circuit failure codes when another hunk opens the circuit', async () => { + const controller = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 1, + abortController: controller, + }); + const runSkill = vi.fn(async () => { + circuitBreaker.recordFailure('provider_unavailable', 'provider outage'); + return { + result: { + status: 'turn_limit', + text: '', + errors: ['max turns reached'], + usage: makeUsage(), + }, + }; + }); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => undefined); + const skill: SkillDefinition = { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }; + + const result = await analyzeFile( + skill, + makePreparedFile(), + '/tmp/repo', + { + abortController: controller, + circuitBreaker, + retry: { + maxRetries: 0, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + }, + }, + ); + + expect(circuitBreaker.reason?.code).toBe('provider_unavailable'); + expect(result.failedHunks).toBe(1); + expect(result.hunkFailures[0]?.code).toBe('max_turns'); + expect(result.hunkFailures[0]?.message).toContain('max turns reached'); + consoleSpy.mockRestore(); + }); +}); + +describe('runSkill', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('preserves partial findings when the shared circuit opens mid-run', async () => { + const controller = new AbortController(); + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 2, + abortController: controller, + }); + const runSkillMock = vi.fn() + .mockResolvedValueOnce({ + result: { + status: 'success', + text: JSON.stringify({ + findings: [makeFinding(10, 'first-finding')], + }), + errors: [], + usage: makeUsage(), + }, + }) + .mockRejectedValue(new Error('Claude Code process exited with code 1')); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill: runSkillMock, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => undefined); + + const report = await runSkill( + { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }, + makeContextWithThreeHunks(), + { + abortController: controller, + circuitBreaker, + retry: { + maxRetries: 0, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + }, + verifyFindings: false, + }, + ); + + expect(runSkillMock).toHaveBeenCalledTimes(3); + expect(circuitBreaker.reason?.code).toBe('provider_unavailable'); + expect(report.findings).toEqual([ + expect.objectContaining({ + title: 'Finding at line 10', + location: { path: 'src/example.ts', startLine: 10 }, + }), + ]); + expect(report.failedHunks).toBe(2); + expect(report.hunkFailures?.map((failure) => failure.code)).toEqual([ + 'provider_unavailable', + 'provider_unavailable', + ]); + expect(report.error).toBeUndefined(); + consoleSpy.mockRestore(); + }); + + it('ignores unrelated circuit state when this skill completed without failures', async () => { + const circuitBreaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 1, + }); + const successResult = { + result: { + status: 'success', + text: JSON.stringify({ findings: [] }), + errors: [], + usage: makeUsage(), + }, + }; + const runSkillMock = vi.fn() + .mockResolvedValueOnce(successResult) + .mockImplementationOnce(async () => { + circuitBreaker.recordFailure('provider_unavailable', 'temporary outage'); + return successResult; + }); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill: runSkillMock, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + + const report = await runSkill( + { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }, + makeContextWithTwoHunks(), + { + circuitBreaker, + verifyFindings: false, + }, + ); + + expect(runSkillMock).toHaveBeenCalledTimes(2); + expect(report.findings).toEqual([]); + expect(report.failedHunks).toBeUndefined(); + expect(report.failedExtractions).toBeUndefined(); + expect(report.error).toBeUndefined(); + }); + + it('classifies mixed provider and extraction failures as provider unavailable', async () => { + const runSkillMock = vi.fn() + .mockResolvedValueOnce({ + result: { + status: 'provider_error', + text: '', + errors: ['provider overloaded'], + usage: makeUsage(), + }, + }) + .mockResolvedValueOnce({ + result: { + status: 'success', + text: 'not json', + errors: [], + usage: makeUsage(), + }, + }); + vi.mocked(getRuntime).mockReturnValue({ + name: 'claude', + runSkill: runSkillMock, + runAuxiliary: vi.fn(), + runSynthesis: vi.fn(), + } as unknown as Runtime); + const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => undefined); + + await expect(runSkill( + { + name: 'security-review', + description: 'Security review.', + prompt: 'Return findings as JSON.', + }, + makeContextWithTwoHunks(), + { + retry: { + maxRetries: 0, + initialDelayMs: 1, + backoffMultiplier: 1, + maxDelayMs: 1, + }, + verifyFindings: false, + }, + )).rejects.toMatchObject({ code: 'provider_unavailable' }); + expect(runSkillMock).toHaveBeenCalledTimes(2); + consoleSpy.mockRestore(); + }); }); diff --git a/src/sdk/analyze.ts b/src/sdk/analyze.ts index 84779617..1f86a1bf 100644 --- a/src/sdk/analyze.ts +++ b/src/sdk/analyze.ts @@ -1,8 +1,9 @@ import type { SkillDefinition } from '../config/schema.js'; -import type { Finding, RetryConfig } from '../types/index.js'; +import type { ErrorCode, Finding, RetryConfig } from '../types/index.js'; import { getHunkLineRange, type HunkWithContext } from '../diff/index.js'; import { Sentry, emitExtractionMetrics, emitRetryMetric } from '../sentry.js'; import { SkillRunnerError, WardenAuthenticationError, isRetryableError, isAuthenticationError, isAuthenticationErrorMessage, isSubprocessError, classifyError, mapExtractionErrorCode, sanitizeErrorMessage } from './errors.js'; +import type { CircuitBreakerReason } from './circuit-breaker.js'; import { DEFAULT_RETRY_CONFIG, calculateRetryDelay, sleep } from './retry.js'; import { aggregateUsage, emptyUsage, estimateTokens, aggregateAuxiliaryUsage } from './usage.js'; import { buildHunkSystemPrompt, buildHunkUserPrompt, type PRPromptContext } from './prompt.js'; @@ -57,6 +58,36 @@ function isAbortRequested(error: unknown, abortController?: AbortController): bo return (abortController?.signal.aborted ?? false) || classifyError(error).code === 'aborted'; } +function isCircuitBreakerCode(code: ErrorCode | undefined): code is CircuitBreakerReason['code'] { + return code === 'auth_failed' || code === 'provider_unavailable'; +} + +function hunkFailureFromCircuit( + reason: CircuitBreakerReason, + usage: UsageStats[], + attempts: number, +): HunkAnalysisResult { + return { + findings: [], + usage: aggregateUsage(usage), + failed: true, + extractionFailed: false, + failureCode: reason.code, + failureMessage: reason.message, + attempts, + }; +} + +function recordCircuitFailure( + options: SkillRunnerOptions, + code: ErrorCode, + message: string, +): CircuitBreakerReason | undefined { + if (!isCircuitBreakerCode(code)) return undefined; + options.circuitBreaker?.recordFailure(code, message); + return options.circuitBreaker?.reason; +} + /** * Parse findings from a hunk analysis result. * Uses a two-tier extraction strategy: @@ -183,6 +214,11 @@ async function analyzeHunk( const accumulatedUsage: UsageStats[] = []; for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) { + const circuitReason = options.circuitBreaker?.reason; + if (circuitReason) { + return hunkFailureFromCircuit(circuitReason, accumulatedUsage, attempt); + } + // Check for abort before each attempt if (abortController?.signal.aborted) { callbacks?.onHunkFailed?.(callbacks.lineRange, 'Analysis aborted'); @@ -257,18 +293,30 @@ async function analyzeHunk( const errorSummary = errorMessages.length > 0 ? sanitizeErrorMessage(errorMessages.join('; ')) : `Runtime error: ${resultMessage.status}`; - notifyHunkFailed(callbacks, callbacks?.lineRange ?? lineRange, `Runtime execution failed: ${errorSummary}`); + const failureCode = + resultMessage.status === 'turn_limit' + ? 'max_turns' + : resultMessage.status === 'provider_error' + ? 'provider_unavailable' + : 'sdk_error'; + const failureMessage = `Runtime execution failed: ${errorSummary}`; + const openReason = recordCircuitFailure(options, failureCode, failureMessage); + notifyHunkFailed(callbacks, callbacks?.lineRange ?? lineRange, failureMessage); + if (openReason) { + return hunkFailureFromCircuit(openReason, accumulatedUsage, attempt + 1); + } return { findings: [], usage: aggregateUsage(accumulatedUsage), failed: true, extractionFailed: false, - failureCode: resultMessage.status === 'turn_limit' ? 'max_turns' : 'sdk_error', - failureMessage: `Runtime execution failed: ${errorSummary}`, + failureCode, + failureMessage, attempts: attempt + 1, }; } + options.circuitBreaker?.recordSuccess(); const parseResult = await parseHunkOutput(resultMessage, hunkCtx.filename, options); // Filter findings outside hunk line range (defense-in-depth) @@ -339,6 +387,8 @@ async function analyzeHunk( // Re-throw authentication errors (they shouldn't be retried) if (error instanceof WardenAuthenticationError) { + const message = sanitizeErrorMessage(error.message); + options.circuitBreaker?.recordFailure('auth_failed', message); throw error; } @@ -346,6 +396,7 @@ async function analyzeHunk( // can't communicate — surface as an auth error with actionable guidance if (isSubprocessError(error)) { const errorMessage = error instanceof Error ? error.message : String(error); + options.circuitBreaker?.recordFailure('auth_failed', sanitizeErrorMessage(errorMessage)); throw new WardenAuthenticationError( `Claude Code subprocess failed (${errorMessage}).\n` + `This usually means the claude CLI cannot run in this environment.`, @@ -355,11 +406,14 @@ async function analyzeHunk( // Authentication errors should surface immediately with helpful guidance if (isAuthenticationError(error)) { + const errorMessage = error instanceof Error ? error.message : String(error); + options.circuitBreaker?.recordFailure('auth_failed', sanitizeErrorMessage(errorMessage)); throw new WardenAuthenticationError(undefined, { cause: error }); } // Don't retry if not a retryable error or we've exhausted retries - if (!isRetryableError(error) || attempt >= retryConfig.maxRetries) { + const shouldRetry = isRetryableError(error) && attempt < retryConfig.maxRetries; + if (!shouldRetry) { break; } @@ -426,6 +480,10 @@ async function analyzeHunk( const { code: retryCode, message } = classifyError(lastError); const retryMsg = sanitizeErrorMessage(message); + const openReason = recordCircuitFailure(options, retryCode, retryMsg); + if (openReason) { + return hunkFailureFromCircuit(openReason, accumulatedUsage, retryConfig.maxRetries + 1); + } return { findings: [], usage: aggregateUsage(accumulatedUsage), @@ -781,7 +839,21 @@ export async function runSkill( // at most one (analyzeFile makes them mutually exclusive), and an // extraction-only failure scenario would otherwise slip through silently. const totalAttemptFailures = totalFailedHunks + totalFailedExtractions; + const circuitReason = options.circuitBreaker?.reason; + if (circuitReason && totalAttemptFailures > 0 && allFindings.length === 0) { + throw new SkillRunnerError(circuitReason.message, { code: circuitReason.code }); + } if (totalAttemptFailures > 0 && totalAttemptFailures === totalHunks && allFindings.length === 0) { + const analysisFailures = allHunkFailures.filter((failure) => failure.type === 'analysis'); + if ( + analysisFailures.length > 0 + && analysisFailures.every((failure) => failure.code === 'provider_unavailable') + ) { + throw new SkillRunnerError( + `Provider unavailable: all ${totalHunks} chunk${totalHunks === 1 ? '' : 's'} failed to analyze. Warden stopped early.`, + { code: 'provider_unavailable' }, + ); + } throw new SkillRunnerError( `All ${totalHunks} chunk${totalHunks === 1 ? '' : 's'} failed to analyze. ` + `This usually indicates an authentication problem. ` + diff --git a/src/sdk/circuit-breaker.test.ts b/src/sdk/circuit-breaker.test.ts new file mode 100644 index 00000000..878daa31 --- /dev/null +++ b/src/sdk/circuit-breaker.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from 'vitest'; +import { ProviderFailureCircuitBreaker } from './circuit-breaker.js'; + +describe('ProviderFailureCircuitBreaker', () => { + it('opens immediately on auth failures', () => { + const controller = new AbortController(); + const breaker = new ProviderFailureCircuitBreaker({ abortController: controller }); + + breaker.recordFailure('auth_failed', 'bad key'); + + expect(breaker.reason).toEqual({ code: 'auth_failed', message: 'bad key' }); + expect(controller.signal.aborted).toBe(true); + }); + + it('opens after consecutive provider failures and resets on success', () => { + const controller = new AbortController(); + const breaker = new ProviderFailureCircuitBreaker({ + maxConsecutiveProviderFailures: 2, + abortController: controller, + }); + + breaker.recordFailure('provider_unavailable', 'first outage'); + breaker.recordSuccess(); + breaker.recordFailure('provider_unavailable', 'second outage'); + + expect(breaker.reason).toBeUndefined(); + expect(controller.signal.aborted).toBe(false); + + breaker.recordFailure('provider_unavailable', 'third outage'); + + expect(breaker.reason?.code).toBe('provider_unavailable'); + expect(breaker.reason?.message).toContain('Provider unavailable after 2 consecutive failures'); + expect(controller.signal.aborted).toBe(true); + }); +}); diff --git a/src/sdk/circuit-breaker.ts b/src/sdk/circuit-breaker.ts new file mode 100644 index 00000000..ad15e2cb --- /dev/null +++ b/src/sdk/circuit-breaker.ts @@ -0,0 +1,73 @@ +import type { ErrorCode } from '../types/index.js'; +import { sanitizeErrorMessage } from './errors.js'; + +const DEFAULT_MAX_CONSECUTIVE_PROVIDER_FAILURES = 5; + +type CircuitBreakerCode = Extract; + +export interface CircuitBreakerReason { + code: CircuitBreakerCode; + message: string; +} + +interface ProviderFailureCircuitBreakerOptions { + maxConsecutiveProviderFailures?: number; + abortController?: AbortController; +} + +function providerUnavailableMessage(count: number, lastMessage: string): string { + const detail = sanitizeErrorMessage(lastMessage).trim(); + const suffix = detail ? ` Last error: ${detail}` : ''; + return `Provider unavailable after ${count} consecutive failures. Warden stopped early.${suffix}`; +} + +/** + * Tracks unrecoverable provider failures across a Warden run. + */ +export class ProviderFailureCircuitBreaker { + private consecutiveProviderFailures = 0; + private openReason?: CircuitBreakerReason; + private readonly maxConsecutiveProviderFailures: number; + private readonly abortController?: AbortController; + + constructor(options: ProviderFailureCircuitBreakerOptions = {}) { + this.maxConsecutiveProviderFailures = + options.maxConsecutiveProviderFailures ?? DEFAULT_MAX_CONSECUTIVE_PROVIDER_FAILURES; + this.abortController = options.abortController; + } + + get reason(): CircuitBreakerReason | undefined { + return this.openReason; + } + + recordSuccess(): void { + if (this.openReason) return; + this.consecutiveProviderFailures = 0; + } + + recordFailure(code: ErrorCode, message: string): void { + if (this.openReason) return; + + if (code === 'auth_failed') { + this.open({ code, message }); + return; + } + + if (code !== 'provider_unavailable') return; + + this.consecutiveProviderFailures++; + if (this.consecutiveProviderFailures >= this.maxConsecutiveProviderFailures) { + this.open({ + code, + message: providerUnavailableMessage(this.consecutiveProviderFailures, message), + }); + } + } + + private open(reason: CircuitBreakerReason): void { + this.openReason = reason; + if (!this.abortController?.signal.aborted) { + this.abortController?.abort(); + } + } +} diff --git a/src/sdk/errors.test.ts b/src/sdk/errors.test.ts index d616ba36..2389439c 100644 --- a/src/sdk/errors.test.ts +++ b/src/sdk/errors.test.ts @@ -100,6 +100,20 @@ describe('classifyError', () => { expect(classifyError(err).code).toBe('auth_failed'); }); + it('tags retryable API errors as provider_unavailable', () => { + const err = new APIError( + 529, + { error: { type: 'overloaded_error', message: 'overloaded' } }, + 'overloaded', + undefined + ); + expect(classifyError(err).code).toBe('provider_unavailable'); + }); + + it('tags Claude Code process exits as provider_unavailable', () => { + expect(classifyError(new Error('Claude Code process exited with code 1')).code).toBe('provider_unavailable'); + }); + it('tags AbortError as aborted', () => { const err = new Error('The operation was aborted'); err.name = 'AbortError'; diff --git a/src/sdk/errors.ts b/src/sdk/errors.ts index 80d6d251..0e76b24d 100644 --- a/src/sdk/errors.ts +++ b/src/sdk/errors.ts @@ -114,6 +114,20 @@ export function isRetryableError(error: unknown): boolean { return false; } +/** + * Check if an error indicates an unavailable provider/runtime. + * These failures can recover later, but repeated failures should stop the run. + */ +function isProviderUnavailableError(error: unknown): boolean { + if (isRetryableError(error)) return true; + + const message = error instanceof Error ? error.message : String(error); + return ( + /Claude Code process exited with code \d+/i.test(message) || + /Claude Code stderr:[\s\S]*\b(overloaded|rate limit|timed? out|timeout|ECONNRESET|ECONNREFUSED|ENOTFOUND|ETIMEDOUT)\b/i.test(message) + ); +} + /** * Check if an error is an authentication failure. * These require user action (login or API key) and should not be retried. @@ -144,6 +158,9 @@ export function classifyError(error: unknown): { code: ErrorCode; message: strin if (isAuthenticationError(error)) { return { code: 'auth_failed', message }; } + if (isProviderUnavailableError(error)) { + return { code: 'provider_unavailable', message }; + } if (error instanceof Error && error.name === 'AbortError') { return { code: 'aborted', message }; } diff --git a/src/sdk/types.ts b/src/sdk/types.ts index 4562a219..cff7f6cc 100644 --- a/src/sdk/types.ts +++ b/src/sdk/types.ts @@ -2,6 +2,7 @@ import type { Finding, UsageStats, SkippedFile, RetryConfig, ErrorCode, HunkFail import type { HunkWithContext } from '../diff/index.js'; import type { ChunkingConfig } from '../config/schema.js'; import type { RuntimeName } from './runtimes/index.js'; +import type { ProviderFailureCircuitBreaker } from './circuit-breaker.js'; /** A single auxiliary usage entry, keyed by agent name (e.g. 'extraction', 'dedup'). */ export interface AuxiliaryUsageEntry { @@ -113,6 +114,8 @@ export interface SkillRunnerOptions { callbacks?: SkillRunnerCallbacks; /** Abort controller for cancellation on SIGINT */ abortController?: AbortController; + /** Shared circuit breaker for run-scoped auth/provider failures */ + circuitBreaker?: ProviderFailureCircuitBreaker; /** Path to Claude Code CLI executable. Required in CI environments when using the Claude runtime. */ pathToClaudeCodeExecutable?: string; /** Retry configuration for transient API failures */ diff --git a/src/types/index.ts b/src/types/index.ts index b08a0e26..743024d3 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -200,6 +200,7 @@ export type FileReport = z.infer; // Stable codes for run failures. Public contract: add new codes, do not rename. export const ErrorCodeSchema = z.enum([ 'auth_failed', + 'provider_unavailable', 'sdk_error', 'subprocess_failure', 'max_turns',