From e1f9ef760fdc2101a6de3c21348e96ce82f0f31c Mon Sep 17 00:00:00 2001 From: ruben-cytonic Date: Thu, 12 Mar 2026 16:23:07 +0000 Subject: [PATCH] feat: add agent reviewer (--review) and bump to v0.4.4 - Add LLM-powered diff review step that runs after lint/build/test pass but before commit, catching security issues, logic errors, and pattern violations that automated checks miss - New src/loop/reviewer.ts module using existing tryCallLLM infrastructure (Anthropic/OpenAI/OpenRouter) with structured JSON findings output - Errors block commit and feed back via lastValidationFeedback; warnings are logged but non-blocking; gracefully skips if no diff or no API key - Wire --review CLI flag on run command and export public API types - Close 14 delivered issues (#212, #224, #225, #226, #227, #228, #229, #231, #232, #233, #237, #239, #240, #241) - Bump version to 0.4.4 Co-authored-by: Amp Amp-Thread-ID: https://ampcode.com/threads/T-019ce2d4-0f5f-742e-9721-3181f57267df --- package.json | 2 +- src/cli.ts | 1 + src/commands/run.ts | 3 + src/index.ts | 2 + src/loop/executor.ts | 119 ++++++++++++++++++++--- src/loop/memory.ts | 18 ++-- src/loop/progress.ts | 10 +- src/loop/reviewer.ts | 221 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 349 insertions(+), 27 deletions(-) create mode 100644 src/loop/reviewer.ts diff --git a/package.json b/package.json index d46fe0ea..de2a0fb1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ralph-starter", - "version": "0.4.3", + "version": "0.4.4", "description": "Ralph Wiggum made easy. One command to run autonomous AI coding loops with auto-commit, PRs, and Docker sandbox.", "main": "dist/index.js", "bin": { diff --git a/src/cli.ts b/src/cli.ts index fa88af60..692d2cb6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -132,6 +132,7 @@ program '--no-visual-check', 'Disable visual comparison validation (auto-enabled when Figma screenshots exist)' ) + .option('--review', 'Run LLM-powered diff review before commit (catches security/logic issues)') // Swarm mode options .option('--swarm', 'Run with multiple agents in parallel (swarm mode)') .option( diff --git a/src/commands/run.ts b/src/commands/run.ts index 313b0750..d996b3b3 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -312,6 +312,8 @@ export interface RunCommandOptions { strategy?: 'race' | 'consensus' | 'pipeline'; // Amp options ampMode?: 'smart' | 'rush' | 'deep'; + // Agent reviewer + review?: boolean; } export async function runCommand( @@ -1434,6 +1436,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN visualValidation, figmaScreenshotPaths, ampMode: options.ampMode, + review: options.review, }; // Swarm mode: run with multiple agents in parallel diff --git a/src/index.ts b/src/index.ts index bfbcf09f..449ff5de 100644 --- a/src/index.ts +++ b/src/index.ts @@ -45,6 +45,8 @@ export { CostTracker, resolveModelPricing } from './loop/cost-tracker.js'; export type { IterationUpdate, LoopOptions, LoopResult } from './loop/executor.js'; export { runLoop } from './loop/executor.js'; export { appendProjectMemory, readProjectMemory } from './loop/memory.js'; +export type { ReviewFinding, ReviewResult, ReviewSeverity } from './loop/reviewer.js'; +export { runReview } from './loop/reviewer.js'; export type { SwarmAgentResult, SwarmConfig, SwarmResult, SwarmStrategy } from './loop/swarm.js'; export { runSwarm } from './loop/swarm.js'; export { detectValidationCommands, runAllValidations, runValidation } from './loop/validation.js'; diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 5c78a585..627b7397 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -38,6 +38,7 @@ import { estimateLoop, formatEstimateDetailed } from './estimator.js'; import { appendProjectMemory, formatMemoryPrompt, readProjectMemory } from './memory.js'; import { checkFileBasedCompletion, createProgressTracker, type ProgressEntry } from './progress.js'; import { RateLimiter } from './rate-limiter.js'; +import { formatReviewAsValidation, formatReviewFeedback, runReview } from './reviewer.js'; import { analyzeResponse, hasExitSignal } from './semantic-analyzer.js'; import { detectClaudeSkills, formatSkillsForPrompt } from './skills.js'; import { detectStepFromOutput } from './step-detector.js'; @@ -269,6 +270,12 @@ export type LoopOptions = { env?: Record; /** Amp agent mode: smart, rush, deep */ ampMode?: import('./agents.js').AmpMode; + /** Run LLM-powered diff review after validation passes (before commit) */ + review?: boolean; + /** Product name shown in logs/UI (default: 'Ralph-Starter'). Set to white-label when embedding. */ + productName?: string; + /** Dot-directory for memory/iteration-log/activity (default: '.ralph'). */ + dotDir?: string; }; export type LoopResult = { @@ -401,13 +408,14 @@ function appendIterationLog( iteration: number, summary: string, validationPassed: boolean, - hasChanges: boolean + hasChanges: boolean, + dotDir = '.ralph' ): void { try { - const ralphDir = join(cwd, '.ralph'); - if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + const stateDir = join(cwd, dotDir); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); - const logPath = join(ralphDir, 'iteration-log.md'); + const logPath = join(stateDir, 'iteration-log.md'); const entry = `## Iteration ${iteration} - Status: ${validationPassed ? 'validation passed' : 'validation failed'} - Changes: ${hasChanges ? 'yes' : 'no files changed'} @@ -423,9 +431,13 @@ function appendIterationLog( * Read the last N iteration summaries from .ralph/iteration-log.md. * Used by context-builder to give the agent memory of previous iterations. */ -export function readIterationLog(cwd: string, maxEntries = 3): string | undefined { +export function readIterationLog( + cwd: string, + maxEntries = 3, + dotDir = '.ralph' +): string | undefined { try { - const logPath = join(cwd, '.ralph', 'iteration-log.md'); + const logPath = join(cwd, dotDir, 'iteration-log.md'); if (!existsSync(logPath)) return undefined; const content = readFileSync(logPath, 'utf-8'); @@ -503,6 +515,9 @@ export async function runLoop(options: LoopOptions): Promise { isSpinning: false, } : ora(); + const productName = options.productName || 'Ralph-Starter'; + const dotDir = options.dotDir || '.ralph'; + let maxIterations = options.maxIterations || 50; const commits: string[] = []; const startTime = Date.now(); @@ -523,7 +538,7 @@ export async function runLoop(options: LoopOptions): Promise { // Initialize progress tracker const progressTracker = options.trackProgress - ? createProgressTracker(options.cwd, options.task) + ? createProgressTracker(options.cwd, options.task, dotDir) : null; // Initialize cost tracker @@ -560,10 +575,10 @@ export async function runLoop(options: LoopOptions): Promise { } // Inject project memory from previous runs (if available) - const projectMemory = readProjectMemory(options.cwd); + const projectMemory = readProjectMemory(options.cwd, dotDir); if (projectMemory) { - taskWithSkills = `${taskWithSkills}\n\n${formatMemoryPrompt(projectMemory)}`; - log(chalk.dim(' Project memory loaded from .ralph/memory.md')); + taskWithSkills = `${taskWithSkills}\n\n${formatMemoryPrompt(projectMemory, dotDir)}`; + log(chalk.dim(` Project memory loaded from ${dotDir}/memory.md`)); } // Build abbreviated spec summary for context builder (iterations 2+) @@ -585,7 +600,7 @@ export async function runLoop(options: LoopOptions): Promise { // Show startup summary box const startupLines: string[] = []; - startupLines.push(chalk.cyan.bold(' Ralph-Starter')); + startupLines.push(chalk.cyan.bold(` ${productName}`)); startupLines.push(` Agent: ${chalk.white(options.agent.name)}`); startupLines.push(` Max loops: ${chalk.white(String(maxIterations))}`); if (validationCommands.length > 0) { @@ -871,7 +886,7 @@ export async function runLoop(options: LoopOptions): Promise { // Build iteration-specific task with smart context windowing // Read iteration log for inter-iteration memory (iterations 2+) - const iterationLog = i > 1 ? readIterationLog(options.cwd) : undefined; + const iterationLog = i > 1 ? readIterationLog(options.cwd, 3, dotDir) : undefined; const builtContext = buildIterationContext({ fullTask: options.task, @@ -1496,6 +1511,82 @@ export async function runLoop(options: LoopOptions): Promise { } } + // --- Agent reviewer: LLM-powered diff review before commit --- + if (options.review && hasChanges && i > 1 && pastWarmup) { + spinner.start(chalk.yellow(`Loop ${i}: Running agent review...`)); + try { + const reviewResult = await runReview(options.cwd); + if (reviewResult && !reviewResult.passed) { + const reviewValidation = formatReviewAsValidation(reviewResult); + validationResults.push(reviewValidation); + const feedback = formatReviewFeedback(reviewResult); + spinner.fail( + chalk.red( + `Loop ${i}: Agent review found ${reviewResult.findings.filter((f) => f.severity === 'error').length} error(s)` + ) + ); + for (const f of reviewResult.findings) { + const icon = f.severity === 'error' ? '❌' : f.severity === 'warning' ? '⚠️' : 'ℹ️'; + const location = f.file ? ` (${f.file}${f.line ? `:${f.line}` : ''})` : ''; + log(chalk.dim(` ${icon}${location} ${f.message}`)); + } + + validationFailures++; + const reviewErrorMsg = reviewResult.findings + .map((f) => `[${f.severity}] ${f.file ?? ''} ${f.message}`) + .join('\n'); + const tripped = circuitBreaker.recordFailure(reviewErrorMsg); + if (tripped) { + if (progressTracker && progressEntry) { + progressEntry.status = 'failed'; + progressEntry.summary = `Circuit breaker tripped (agent-review)`; + progressEntry.validationResults = validationResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + finalIteration = i; + exitReason = 'circuit_breaker'; + break; + } + + lastValidationFeedback = feedback; + + if (progressTracker && progressEntry) { + progressEntry.status = 'validation_failed'; + progressEntry.summary = 'Agent review failed'; + progressEntry.validationResults = validationResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + + continue; + } + if (reviewResult) { + const warnFindings = reviewResult.findings.filter((f) => f.severity === 'warning'); + const infoFindings = reviewResult.findings.filter((f) => f.severity === 'info'); + const parts: string[] = []; + if (warnFindings.length > 0) parts.push(`${warnFindings.length} warning(s)`); + if (infoFindings.length > 0) parts.push(`${infoFindings.length} info`); + const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : ''; + spinner.succeed(chalk.green(`Loop ${i}: Agent review passed${suffix}`)); + for (const f of [...warnFindings, ...infoFindings]) { + const icon = f.severity === 'warning' ? '⚠️' : 'ℹ️'; + log(chalk.dim(` ${icon} ${f.message}`)); + } + circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; + } else { + spinner.info(chalk.dim(`Loop ${i}: Agent review skipped (no diff or no LLM key)`)); + } + } catch (err) { + spinner.warn( + chalk.yellow( + `Loop ${i}: Agent review skipped (${err instanceof Error ? err.message : 'unknown error'})` + ) + ); + } + } + // Auto-commit if enabled and there are changes let committed = false; let commitMsg = ''; @@ -1547,7 +1638,7 @@ export async function runLoop(options: LoopOptions): Promise { // Write iteration summary for inter-iteration memory const iterSummary = summarizeChanges(result.output); const iterValidationPassed = validationResults.every((r) => r.success); - appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges); + appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges, dotDir); if (status === 'done') { const completionReason = completionResult.reason || 'Task marked as complete by agent'; @@ -1686,7 +1777,7 @@ export async function runLoop(options: LoopOptions): Promise { if (costTracker) { memorySummary.push(`Cost: ${formatCost(costTracker.getStats().totalCost.totalCost)}`); } - appendProjectMemory(options.cwd, memorySummary.join('\n')); + appendProjectMemory(options.cwd, memorySummary.join('\n'), dotDir); return { success: exitReason === 'completed' || exitReason === 'file_signal', diff --git a/src/loop/memory.ts b/src/loop/memory.ts index b3ffd90a..4c705d3e 100644 --- a/src/loop/memory.ts +++ b/src/loop/memory.ts @@ -15,9 +15,9 @@ const MAX_MEMORY_BYTES = 8 * 1024; // 8KB max — keeps context window usage rea * Read the project memory file. * Returns undefined if no memory exists yet. */ -export function readProjectMemory(cwd: string): string | undefined { +export function readProjectMemory(cwd: string, dotDir = '.ralph'): string | undefined { try { - const memoryPath = join(cwd, '.ralph', MEMORY_FILE); + const memoryPath = join(cwd, dotDir, MEMORY_FILE); if (!existsSync(memoryPath)) return undefined; const content = readFileSync(memoryPath, 'utf-8').trim(); @@ -45,12 +45,12 @@ export function readProjectMemory(cwd: string): string | undefined { /** * Append an entry to the project memory file. */ -export function appendProjectMemory(cwd: string, entry: string): void { +export function appendProjectMemory(cwd: string, entry: string, dotDir = '.ralph'): void { try { - const ralphDir = join(cwd, '.ralph'); - if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + const stateDir = join(cwd, dotDir); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); - const memoryPath = join(ralphDir, MEMORY_FILE); + const memoryPath = join(stateDir, MEMORY_FILE); const timestamp = new Date().toISOString().split('T')[0]; const formatted = `## ${timestamp}\n${entry.trim()}\n\n`; @@ -63,13 +63,13 @@ export function appendProjectMemory(cwd: string, entry: string): void { /** * Format memory content as a prompt section for injection into agent context. */ -export function formatMemoryPrompt(memory: string): string { +export function formatMemoryPrompt(memory: string, dotDir = '.ralph'): string { return `## Project Memory (from previous runs) -The following notes were saved from previous ralph-starter runs on this project. +The following notes were saved from previous runs on this project. Use them to understand project conventions and avoid repeating mistakes. ${memory} -If you discover new project conventions or important patterns, append them to \`.ralph/memory.md\`. +If you discover new project conventions or important patterns, append them to \`${dotDir}/memory.md\`. `; } diff --git a/src/loop/progress.ts b/src/loop/progress.ts index e00e04a5..ce13e918 100644 --- a/src/loop/progress.ts +++ b/src/loop/progress.ts @@ -21,7 +21,7 @@ export interface ProgressTracker { clear(): Promise; } -const ACTIVITY_FILE = '.ralph/activity.md'; +const DEFAULT_ACTIVITY_DIR = '.ralph'; /** * Format a progress entry as markdown @@ -128,8 +128,12 @@ function getFileHeader(task: string): string { /** * Create a progress tracker for a directory */ -export function createProgressTracker(cwd: string, task: string): ProgressTracker { - const filePath = path.join(cwd, ACTIVITY_FILE); +export function createProgressTracker( + cwd: string, + task: string, + dotDir = DEFAULT_ACTIVITY_DIR +): ProgressTracker { + const filePath = path.join(cwd, dotDir, 'activity.md'); const dirPath = path.dirname(filePath); let initialized = false; diff --git a/src/loop/reviewer.ts b/src/loop/reviewer.ts new file mode 100644 index 00000000..3591654a --- /dev/null +++ b/src/loop/reviewer.ts @@ -0,0 +1,221 @@ +/** + * Agent Reviewer — LLM-powered diff review step for the executor loop. + * + * Slots into the validation pipeline after lint/build/test pass but before commit. + * Analyzes the current git diff and returns structured feedback that can be fed + * back into the next iteration via lastValidationFeedback. + */ + +import { execa } from 'execa'; +import { tryCallLLM } from '../llm/api.js'; +import type { ValidationResult } from './validation.js'; + +/** Maximum diff size in characters to send to the LLM (avoid context overflow) */ +const MAX_DIFF_CHARS = 30_000; + +/** Review severity levels */ +export type ReviewSeverity = 'error' | 'warning' | 'info'; + +export type ReviewFinding = { + severity: ReviewSeverity; + message: string; + file?: string; + line?: number; +}; + +export type ReviewResult = { + passed: boolean; + findings: ReviewFinding[]; + model?: string; + /** Raw LLM response for debugging */ + raw?: string; +}; + +/** + * Get all working-tree changes as a single coherent diff. + * Prefers `git diff HEAD` (covers staged + unstaged in one pass). + * Falls back to combining staged + unstaged for repos with no commits. + */ +async function getDiff(cwd: string): Promise { + // Register untracked files so git diff HEAD includes new files + await execa('git', ['add', '--intent-to-add', '--all'], { cwd, reject: false }); + + const head = await execa('git', ['diff', 'HEAD'], { cwd, reject: false }); + + // Clean up: remove intent-to-add entries so the index isn't permanently mutated + await execa('git', ['restore', '--staged', '.'], { cwd, reject: false }); + + if (head.stdout.trim()) return head.stdout; + + // Fallback for repos with no commits yet: combine staged + unstaged + const [staged, unstaged] = await Promise.all([ + execa('git', ['diff', '--cached'], { cwd, reject: false }), + execa('git', ['diff'], { cwd, reject: false }), + ]); + return [staged.stdout, unstaged.stdout].filter(Boolean).join('\n'); +} + +const REVIEW_SYSTEM_PROMPT = `You are an expert code reviewer embedded in an automated coding loop. Your job is to review a git diff and catch issues that lint, build, and test checks cannot detect. + +Focus on: +1. **Security**: Hardcoded secrets, SQL injection, XSS, insecure defaults, exposed API keys +2. **Logic errors**: Off-by-one, race conditions, null/undefined access, wrong operator +3. **Pattern violations**: Not following the codebase's existing patterns/conventions +4. **Missing error handling**: Unhandled promises, missing try/catch at boundaries +5. **Performance**: Obvious N+1 queries, missing indexes, unbounded loops/allocations + +Do NOT flag: +- Style issues (handled by lint) +- Type errors (handled by build/typecheck) +- Missing tests (handled by test runner) +- Minor naming preferences + +Respond with a JSON array of findings. Each finding has: +- "severity": "error" | "warning" | "info" +- "message": concise description of the issue +- "file": the affected file path (if identifiable from the diff) +- "line": the relevant line number in the file (if identifiable from the diff) + +If the diff looks clean, respond with an empty array: [] + +Respond ONLY with the JSON array, no markdown fences or explanation.`; + +/** + * Run the agent reviewer on the current diff. + * Returns null if no diff exists or no LLM provider is available. + */ +export async function runReview(cwd: string): Promise { + const diff = await getDiff(cwd); + if (!diff.trim()) return null; + + // Truncate large diffs at a line boundary to avoid malformed patch hunks + const truncatedDiff = (() => { + if (diff.length <= MAX_DIFF_CHARS) return diff; + const cutIndex = diff.lastIndexOf('\n', MAX_DIFF_CHARS); + const slice = cutIndex > 0 ? diff.slice(0, cutIndex) : diff.slice(0, MAX_DIFF_CHARS); + return `${slice}\n\n... (diff truncated at ${MAX_DIFF_CHARS} chars, ${diff.length} total)`; + })(); + + const response = await tryCallLLM({ + system: REVIEW_SYSTEM_PROMPT, + prompt: `Review this diff:\n\n\`\`\`diff\n${truncatedDiff}\n\`\`\``, + maxTokens: 2048, + }); + + if (!response) return null; + + // Parse JSON findings from the response — null means LLM returned unparseable output + const findings = parseFindings(response.content); + if (!findings) return null; + + const hasErrors = findings.some((f) => f.severity === 'error'); + + return { + passed: !hasErrors, + findings, + model: response.model, + raw: response.content, + }; +} + +/** + * Parse review findings from LLM response. + * Handles both raw JSON arrays and markdown-fenced JSON. + */ +function parseFindings(content: string): ReviewFinding[] | null { + // Strip markdown code fences if present + const cleaned = content + .replace(/^```(?:json)?\s*\n?/m, '') + .replace(/\n?```\s*$/m, '') + .trim(); + + try { + const parsed = JSON.parse(cleaned); + if (!Array.isArray(parsed)) return null; + + // Empty array = clean review + if (parsed.length === 0) return []; + + const valid = parsed + .filter( + (f: unknown): f is { severity: string; message: string; file?: string; line?: number } => + typeof f === 'object' && + f !== null && + 'severity' in f && + 'message' in f && + typeof (f as Record).message === 'string' + ) + .map((f) => ({ + severity: (['error', 'warning', 'info'].includes(f.severity) + ? f.severity + : 'warning') as ReviewSeverity, + message: f.message, + file: typeof f.file === 'string' ? f.file : undefined, + line: typeof f.line === 'number' ? f.line : undefined, + })); + + // Non-empty array but all items malformed = indeterminate + if (valid.length === 0) return null; + + return valid; + } catch { + // LLM returned non-JSON — caller should treat as indeterminate (skipped), not passed + return null; + } +} + +/** + * Format review findings as a ValidationResult for the executor loop. + * This allows the reviewer to slot into the existing validation pipeline. + */ +export function formatReviewAsValidation(result: ReviewResult): ValidationResult { + if (result.passed && result.findings.length === 0) { + return { + success: true, + command: 'agent-review', + output: 'No issues found', + }; + } + + const lines: string[] = []; + for (const finding of result.findings) { + const icon = finding.severity === 'error' ? '❌' : finding.severity === 'warning' ? '⚠️' : 'ℹ️'; + const location = finding.file + ? ` (${finding.file}${finding.line ? `:${finding.line}` : ''})` + : ''; + lines.push(`${icon} [${finding.severity.toUpperCase()}]${location}: ${finding.message}`); + } + + return { + success: result.passed, + command: 'agent-review', + output: lines.join('\n'), + ...(result.passed ? {} : { error: lines.join('\n') }), + }; +} + +/** + * Format review findings as feedback text for the lastValidationFeedback mechanism. + */ +export function formatReviewFeedback(result: ReviewResult): string { + if (result.findings.length === 0) return ''; + + const hasFailed = !result.passed; + const feedback = [hasFailed ? '## Agent Review Failed\n' : '## Agent Review Warnings\n']; + feedback.push( + `The automated code reviewer (${result.model || 'unknown'}) found issues in your changes:\n` + ); + + for (const finding of result.findings) { + const location = finding.file + ? ` in \`${finding.file}${finding.line ? `:${finding.line}` : ''}\`` + : ''; + feedback.push(`- **${finding.severity.toUpperCase()}**${location}: ${finding.message}`); + } + + if (hasFailed) { + feedback.push('\nPlease fix the errors above before continuing.'); + } + + return feedback.join('\n'); +}