Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions specs/jsonl-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@
"type": "string",
"enum": [
"auth_failed",
"provider_unavailable",
"sdk_error",
"subprocess_failure",
"max_turns",
Expand Down
2 changes: 1 addition & 1 deletion specs/reporters.md
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ New run logs are homogeneous: every line is a chunk result. Each line is self-co

**HunkFailure**: `{ type: "analysis" | "extraction", filename: string, lineRange: string, code: ErrorCode, message: string, preview?: string, attempts?: int }`

**ErrorCode**: one of `"auth_failed"`, `"sdk_error"`, `"subprocess_failure"`, `"max_turns"`, `"aborted"`, `"all_hunks_failed"`, `"skill_resolution_failed"`, `"extraction_invalid_json"`, `"extraction_unbalanced_json"`, `"extraction_no_findings_json"`, `"extraction_missing_findings_key"`, `"extraction_findings_not_array"`, `"extraction_llm_failed"`, `"extraction_llm_timeout"`, `"extraction_no_api_key"`, `"unknown"`. Stable public contract.
**ErrorCode**: one of `"auth_failed"`, `"provider_unavailable"`, `"sdk_error"`, `"subprocess_failure"`, `"max_turns"`, `"aborted"`, `"all_hunks_failed"`, `"skill_resolution_failed"`, `"extraction_invalid_json"`, `"extraction_unbalanced_json"`, `"extraction_no_findings_json"`, `"extraction_missing_findings_key"`, `"extraction_findings_not_array"`, `"extraction_llm_failed"`, `"extraction_llm_timeout"`, `"extraction_no_api_key"`, `"unknown"`. Stable public contract.

**BySeverity**: `{ high: int, medium: int, low: int }`. Legacy 5-level keys (`critical`, `info`) are still accepted on read for backward compatibility with older logs and normalized to `high`/`low`; new output is strictly 3-level.

Expand Down
13 changes: 12 additions & 1 deletion src/action/triggers/executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ import {
} from '../../output/github-checks.js';
import { logGroup, logGroupEnd } from '../workflow/base.js';
import { DEFAULT_FILE_CONCURRENCY } from '../../sdk/types.js';
import { SkillRunnerError } from '../../sdk/errors.js';
import { SkillRunnerError, classifyError } from '../../sdk/errors.js';
import type { Semaphore } from '../../utils/index.js';
import { Verbosity } from '../../cli/output/verbosity.js';
import type { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js';

/** Log-mode output for CI: no TTY, no color. */
const CI_OUTPUT_MODE: OutputMode = { isTTY: false, supportsColor: false, columns: 120 };
Expand Down Expand Up @@ -56,6 +57,10 @@ export interface TriggerExecutorDeps {
globalFailCheck?: boolean;
/** Global semaphore for limiting concurrent file analyses across triggers */
semaphore?: Semaphore;
/** Shared controller for stopping the whole action run */
abortController?: AbortController;
/** Shared circuit breaker for auth/provider failures */
circuitBreaker?: ProviderFailureCircuitBreaker;
}

/**
Expand Down Expand Up @@ -144,6 +149,8 @@ export async function executeTrigger(
pathToClaudeCodeExecutable: claudePath,
auxiliaryMaxRetries: trigger.auxiliaryMaxRetries,
verifyFindings: trigger.verifyFindings,
abortController: deps.abortController,
circuitBreaker: deps.circuitBreaker,
},
};

Expand Down Expand Up @@ -216,8 +223,12 @@ export async function executeTrigger(
};
} catch (error) {
if (error instanceof ActionFailedError) throw error;
const { code } = classifyError(error);
Sentry.captureException(error, {
tags: { 'trigger.name': trigger.name, 'skill.name': trigger.skill },
...(code === 'provider_unavailable' || code === 'all_hunks_failed'
? { fingerprint: ['warden', code] }
: {}),
});

// Mark skill check as failed
Expand Down
6 changes: 6 additions & 0 deletions src/action/workflow/pr-workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import type { TriggerResult } from '../triggers/executor.js';
import { postTriggerReview } from '../review/poster.js';
import { shouldResolveStaleComments } from '../review/coordination.js';
import type { RuntimeName } from '../../sdk/runtimes/index.js';
import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js';
import {
createCoreCheck,
updateCoreCheck,
Expand Down Expand Up @@ -318,6 +319,8 @@ async function executeAllTriggers(
// Global semaphore gates file-level work across all triggers.
// All triggers launch immediately; the semaphore limits concurrent file analyses.
const semaphore = new Semaphore(concurrency);
const abortController = new AbortController();
const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController });

return runPool(
matchedTriggers,
Expand All @@ -334,7 +337,10 @@ async function executeAllTriggers(
globalRequestChanges: inputs.requestChanges,
globalFailCheck: inputs.failCheck,
semaphore,
abortController,
circuitBreaker,
}),
{ shouldAbort: () => abortController.signal.aborted },
);
}

Expand Down
21 changes: 18 additions & 3 deletions src/cli/output/ink-runner.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import { Verbosity } from './verbosity.js';
import { ICON_CHECK, ICON_SKIPPED, ICON_PENDING, ICON_ERROR, SPINNER_FRAMES } from './icons.js';
import figures from 'figures';
import type { SkillReport } from '../../types/index.js';
import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js';

interface SkillRunnerProps {
skills: SkillState[];
Expand Down Expand Up @@ -269,7 +270,14 @@ export async function runSkillTasksWithInk(
if (tasks.length === 0 || verbosity === Verbosity.Quiet) {
// No tasks or quiet mode - run without UI using global semaphore.
const semaphore = new Semaphore(concurrency);
const composedTasks = composeTasksWithFailFast(tasks, failFastController);
const circuitAbortController = new AbortController();
const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController: circuitAbortController });
const composedTasks = composeTasksWithFailFast(
tasks,
failFastController,
circuitBreaker,
circuitAbortController,
);
const callbacks: SkillProgressCallbacks = {
...noopCallbacks,
...(fireStreamHook || failFastController
Expand Down Expand Up @@ -456,8 +464,15 @@ export async function runSkillTasksWithInk(
// Global semaphore gates file-level work across all skills.
const semaphore = new Semaphore(concurrency);

// Compose per-task abort controllers: fire on either SIGINT or fail-fast
const composedTasks = composeTasksWithFailFast(tasks, failFastController);
// Compose per-task abort controllers: fire on SIGINT, fail-fast, or provider circuit breaker.
const circuitAbortController = new AbortController();
const circuitBreaker = new ProviderFailureCircuitBreaker({ abortController: circuitAbortController });
const composedTasks = composeTasksWithFailFast(
tasks,
failFastController,
circuitBreaker,
circuitAbortController,
);

// Launch all skills in parallel; the semaphore is the sole concurrency gate.
const results = await runComposedSkillTasks(composedTasks, callbacks, semaphore);
Expand Down
149 changes: 146 additions & 3 deletions src/cli/output/tasks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type { HunkWithContext } from '../../diff/index.js';
import type { SkillDefinition } from '../../config/schema.js';
import { Semaphore, runPool } from '../../utils/index.js';
import { SkillRunnerError, WardenAuthenticationError } from '../../sdk/errors.js';
import { ProviderFailureCircuitBreaker } from '../../sdk/circuit-breaker.js';
import * as sdkRunner from '../../sdk/runner.js';

function makeFinding(overrides: Partial<Finding> = {}): Finding {
Expand Down Expand Up @@ -800,7 +801,7 @@ describe('runSkillTask all-hunks-fail synthesis', () => {
vi.restoreAllMocks();
});

it('synthesizes a report with error.code=all_hunks_failed when every hunk fails', async () => {
it('synthesizes a report with error.code=auth_failed when every hunk fails with auth errors', async () => {
const fakeHunk = {
hunk: { newStart: 1, newCount: 10 },
} as unknown as HunkWithContext;
Expand Down Expand Up @@ -839,7 +840,7 @@ describe('runSkillTask all-hunks-fail synthesis', () => {
const result = await runSkillTask(options, 1, { ...noopCallbacks(), onSkillError });

expect(result.report).toBeDefined();
expect(result.report!.error?.code).toBe('all_hunks_failed');
expect(result.report!.error?.code).toBe('auth_failed');
expect(result.report!.findings).toEqual([]);
expect(result.report!.failedHunks).toBe(1);
expect(result.report!.hunkFailures).toEqual(hunkFailures);
Expand All @@ -848,14 +849,156 @@ describe('runSkillTask all-hunks-fail synthesis', () => {
// re-throw (action executor, Sentry) preserve the ErrorCode. A missing
// error here produces a plain Error downstream and loses classification.
expect(result.error).toBeInstanceOf(SkillRunnerError);
expect((result.error as SkillRunnerError).code).toBe('all_hunks_failed');
expect((result.error as SkillRunnerError).code).toBe('auth_failed');
// Per-file metadata must be present even on failure runs — `warden runs`
// and JSONL consumers count attempted files via report.files. Empty
// files would show totalFiles: 0 for an all-hunks-failed run.
expect(result.report!.files).toHaveLength(1);
expect(result.report!.files![0]!.filename).toBe('a.ts');
});

it('preserves auth_failed when all analysis failures are auth alongside extraction failures', async () => {
const fakeHunks = [
{ hunk: { newStart: 1, newCount: 10 } },
{ hunk: { newStart: 20, newCount: 5 } },
] as unknown as HunkWithContext[];
const hunkFailures: HunkFailure[] = [
{ type: 'analysis', filename: 'a.ts', lineRange: '1-10', code: 'auth_failed', message: 'bad key' },
{
type: 'extraction',
filename: 'a.ts',
lineRange: '20-24',
code: 'extraction_invalid_json',
message: 'invalid_json',
},
];

vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({
files: [{ filename: 'a.ts', hunks: fakeHunks }],
skippedFiles: [],
});
vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({
filename: 'a.ts',
findings: [],
usage: { inputTokens: 0, outputTokens: 0, costUSD: 0 },
failedHunks: 1,
failedExtractions: 1,
hunkFailures,
});

const options: SkillTaskOptions = {
name: 'mixed-fail-skill',
resolveSkill: async () =>
({ name: 'mixed-fail-skill', definition: '', files: [] } as unknown as SkillDefinition),
context: {
eventType: 'pull_request',
repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' },
repoPath: '/tmp',
pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] },
} as unknown as SkillTaskOptions['context'],
};

const result = await runSkillTask(options, 1, noopCallbacks());

expect(result.report!.error?.code).toBe('auth_failed');
expect(result.report!.failedHunks).toBe(1);
expect(result.report!.failedExtractions).toBe(1);
expect((result.error as SkillRunnerError).code).toBe('auth_failed');
});

it('synthesizes provider_unavailable when every hunk fails with provider errors', async () => {
const fakeHunk = {
hunk: { newStart: 1, newCount: 10 },
} as unknown as HunkWithContext;
const hunkFailures: HunkFailure[] = [
{
type: 'analysis',
filename: 'a.ts',
lineRange: '1-10',
code: 'provider_unavailable',
message: 'Claude Code process exited with code 1',
},
];

vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({
files: [{ filename: 'a.ts', hunks: [fakeHunk] }],
skippedFiles: [],
});
vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({
filename: 'a.ts',
findings: [],
usage: { inputTokens: 0, outputTokens: 0, costUSD: 0 },
failedHunks: 1,
failedExtractions: 0,
hunkFailures,
});

const options: SkillTaskOptions = {
name: 'provider-fail-skill',
resolveSkill: async () =>
({ name: 'provider-fail-skill', definition: '', files: [] } as unknown as SkillDefinition),
context: {
eventType: 'pull_request',
repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' },
repoPath: '/tmp',
pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] },
} as unknown as SkillTaskOptions['context'],
};

const result = await runSkillTask(options, 1, noopCallbacks());

expect(result.report!.error?.code).toBe('provider_unavailable');
expect(result.report!.error?.message).toContain('Provider unavailable');
expect((result.error as SkillRunnerError).code).toBe('provider_unavailable');
});

it('ignores unrelated circuit state when this skill completed without failures', async () => {
const fakeHunk = {
hunk: { newStart: 1, newCount: 10 },
} as unknown as HunkWithContext;
const circuitBreaker = new ProviderFailureCircuitBreaker({
maxConsecutiveProviderFailures: 1,
});
circuitBreaker.recordFailure('provider_unavailable', 'temporary outage');

vi.spyOn(sdkRunner, 'prepareFiles').mockReturnValue({
files: [{ filename: 'a.ts', hunks: [fakeHunk] }],
skippedFiles: [],
});
vi.spyOn(sdkRunner, 'analyzeFile').mockResolvedValue({
filename: 'a.ts',
findings: [],
usage: { inputTokens: 1, outputTokens: 1, costUSD: 0.001 },
failedHunks: 0,
failedExtractions: 0,
hunkFailures: [],
});

const options: SkillTaskOptions = {
name: 'clean-skill',
resolveSkill: async () =>
({ name: 'clean-skill', definition: '', files: [] } as unknown as SkillDefinition),
context: {
eventType: 'pull_request',
repository: { owner: 'o', name: 'n', fullName: 'o/n', defaultBranch: 'main' },
repoPath: '/tmp',
pullRequest: { number: 1, title: 't', body: '', headSha: 'abc', baseSha: 'def', files: [] },
} as unknown as SkillTaskOptions['context'],
runnerOptions: { circuitBreaker },
};

const onSkillError = vi.fn();
const result = await runSkillTask(options, 1, { ...noopCallbacks(), onSkillError });

expect(result.error).toBeUndefined();
expect(result.report).toBeDefined();
expect(result.report!.error).toBeUndefined();
expect(result.report!.findings).toEqual([]);
expect(result.report!.failedHunks).toBeUndefined();
expect(result.report!.failedExtractions).toBeUndefined();
expect(onSkillError).not.toHaveBeenCalled();
});

it('triggers all_hunks_failed when every hunk succeeded at SDK level but extraction failed for all', async () => {
// Regression test for the if/else mutual-exclusion change: each hunk
// contributes to either failedHunks OR failedExtractions, not both.
Expand Down
Loading
Loading