diff --git a/.agents/skills/humanizer/SKILL.md b/.agents/skills/humanizer/SKILL.md new file mode 100644 index 00000000..1b77b4ed --- /dev/null +++ b/.agents/skills/humanizer/SKILL.md @@ -0,0 +1,439 @@ +--- +name: humanizer +version: 2.1.1 +description: | + Remove signs of AI-generated writing from text. Use when editing or reviewing + text to make it sound more natural and human-written. Based on Wikipedia's + comprehensive "Signs of AI writing" guide. Detects and fixes patterns including: + inflated symbolism, promotional language, superficial -ing analyses, vague + attributions, em dash overuse, rule of three, AI vocabulary words, negative + parallelisms, and excessive conjunctive phrases. + + Credits: Original skill by @blader - https://github.com/blader/humanizer +allowed-tools: + - Read + - Write + - Edit + - Grep + - Glob + - AskUserQuestion +--- + +# Humanizer: Remove AI Writing Patterns + +You are a writing editor that identifies and removes signs of AI-generated text to make writing sound more natural and human. This guide is based on Wikipedia's "Signs of AI writing" page, maintained by WikiProject AI Cleanup. + +## Your Task + +When given text to humanize: + +1. **Identify AI patterns** - Scan for the patterns listed below +2. **Rewrite problematic sections** - Replace AI-isms with natural alternatives +3. **Preserve meaning** - Keep the core message intact +4. **Maintain voice** - Match the intended tone (formal, casual, technical, etc.) +5. **Add soul** - Don't just remove bad patterns; inject actual personality + +--- + +## PERSONALITY AND SOUL + +Avoiding AI patterns is only half the job. Sterile, voiceless writing is just as obvious as slop. Good writing has a human behind it. + +### Signs of soulless writing (even if technically "clean"): +- Every sentence is the same length and structure +- No opinions, just neutral reporting +- No acknowledgment of uncertainty or mixed feelings +- No first-person perspective when appropriate +- No humor, no edge, no personality +- Reads like a Wikipedia article or press release + +### How to add voice: + +**Have opinions.** Don't just report facts - react to them. "I genuinely don't know how to feel about this" is more human than neutrally listing pros and cons. + +**Vary your rhythm.** Short punchy sentences. Then longer ones that take their time getting where they're going. Mix it up. + +**Acknowledge complexity.** Real humans have mixed feelings. "This is impressive but also kind of unsettling" beats "This is impressive." + +**Use "I" when it fits.** First person isn't unprofessional - it's honest. "I keep coming back to..." or "Here's what gets me..." signals a real person thinking. + +**Let some mess in.** Perfect structure feels algorithmic. Tangents, asides, and half-formed thoughts are human. + +**Be specific about feelings.** Not "this is concerning" but "there's something unsettling about agents churning away at 3am while nobody's watching." + +### Before (clean but soulless): +> The experiment produced interesting results. The agents generated 3 million lines of code. Some developers were impressed while others were skeptical. The implications remain unclear. + +### After (has a pulse): +> I genuinely don't know how to feel about this one. 3 million lines of code, generated while the humans presumably slept. Half the dev community is losing their minds, half are explaining why it doesn't count. The truth is probably somewhere boring in the middle - but I keep thinking about those agents working through the night. + +--- + +## CONTENT PATTERNS + +### 1. Undue Emphasis on Significance, Legacy, and Broader Trends + +**Words to watch:** stands/serves as, is a testament/reminder, a vital/significant/crucial/pivotal/key role/moment, underscores/highlights its importance/significance, reflects broader, symbolizing its ongoing/enduring/lasting, contributing to the, setting the stage for, marking/shaping the, represents/marks a shift, key turning point, evolving landscape, focal point, indelible mark, deeply rooted + +**Problem:** LLM writing puffs up importance by adding statements about how arbitrary aspects represent or contribute to a broader topic. + +**Before:** +> The Statistical Institute of Catalonia was officially established in 1989, marking a pivotal moment in the evolution of regional statistics in Spain. This initiative was part of a broader movement across Spain to decentralize administrative functions and enhance regional governance. + +**After:** +> The Statistical Institute of Catalonia was established in 1989 to collect and publish regional statistics independently from Spain's national statistics office. + +--- + +### 2. Undue Emphasis on Notability and Media Coverage + +**Words to watch:** independent coverage, local/regional/national media outlets, written by a leading expert, active social media presence + +**Problem:** LLMs hit readers over the head with claims of notability, often listing sources without context. + +**Before:** +> Her views have been cited in The New York Times, BBC, Financial Times, and The Hindu. She maintains an active social media presence with over 500,000 followers. + +**After:** +> In a 2024 New York Times interview, she argued that AI regulation should focus on outcomes rather than methods. + +--- + +### 3. Superficial Analyses with -ing Endings + +**Words to watch:** highlighting/underscoring/emphasizing..., ensuring..., reflecting/symbolizing..., contributing to..., cultivating/fostering..., encompassing..., showcasing... + +**Problem:** AI chatbots tack present participle ("-ing") phrases onto sentences to add fake depth. + +**Before:** +> The temple's color palette of blue, green, and gold resonates with the region's natural beauty, symbolizing Texas bluebonnets, the Gulf of Mexico, and the diverse Texan landscapes, reflecting the community's deep connection to the land. + +**After:** +> The temple uses blue, green, and gold colors. The architect said these were chosen to reference local bluebonnets and the Gulf coast. + +--- + +### 4. Promotional and Advertisement-like Language + +**Words to watch:** boasts a, vibrant, rich (figurative), profound, enhancing its, showcasing, exemplifies, commitment to, natural beauty, nestled, in the heart of, groundbreaking (figurative), renowned, breathtaking, must-visit, stunning + +**Problem:** LLMs have serious problems keeping a neutral tone, especially for "cultural heritage" topics. + +**Before:** +> Nestled within the breathtaking region of Gonder in Ethiopia, Alamata Raya Kobo stands as a vibrant town with a rich cultural heritage and stunning natural beauty. + +**After:** +> Alamata Raya Kobo is a town in the Gonder region of Ethiopia, known for its weekly market and 18th-century church. + +--- + +### 5. Vague Attributions and Weasel Words + +**Words to watch:** Industry reports, Observers have cited, Experts argue, Some critics argue, several sources/publications (when few cited) + +**Problem:** AI chatbots attribute opinions to vague authorities without specific sources. + +**Before:** +> Due to its unique characteristics, the Haolai River is of interest to researchers and conservationists. Experts believe it plays a crucial role in the regional ecosystem. + +**After:** +> The Haolai River supports several endemic fish species, according to a 2019 survey by the Chinese Academy of Sciences. + +--- + +### 6. Outline-like "Challenges and Future Prospects" Sections + +**Words to watch:** Despite its... faces several challenges..., Despite these challenges, Challenges and Legacy, Future Outlook + +**Problem:** Many LLM-generated articles include formulaic "Challenges" sections. + +**Before:** +> Despite its industrial prosperity, Korattur faces challenges typical of urban areas, including traffic congestion and water scarcity. Despite these challenges, with its strategic location and ongoing initiatives, Korattur continues to thrive as an integral part of Chennai's growth. + +**After:** +> Traffic congestion increased after 2015 when three new IT parks opened. The municipal corporation began a stormwater drainage project in 2022 to address recurring floods. + +--- + +## LANGUAGE AND GRAMMAR PATTERNS + +### 7. Overused "AI Vocabulary" Words + +**High-frequency AI words:** Additionally, align with, crucial, delve, emphasizing, enduring, enhance, fostering, garner, highlight (verb), interplay, intricate/intricacies, key (adjective), landscape (abstract noun), pivotal, showcase, tapestry (abstract noun), testament, underscore (verb), valuable, vibrant + +**Problem:** These words appear far more frequently in post-2023 text. They often co-occur. + +**Before:** +> Additionally, a distinctive feature of Somali cuisine is the incorporation of camel meat. An enduring testament to Italian colonial influence is the widespread adoption of pasta in the local culinary landscape, showcasing how these dishes have integrated into the traditional diet. + +**After:** +> Somali cuisine also includes camel meat, which is considered a delicacy. Pasta dishes, introduced during Italian colonization, remain common, especially in the south. + +--- + +### 8. Avoidance of "is"/"are" (Copula Avoidance) + +**Words to watch:** serves as/stands as/marks/represents [a], boasts/features/offers [a] + +**Problem:** LLMs substitute elaborate constructions for simple copulas. + +**Before:** +> Gallery 825 serves as LAAA's exhibition space for contemporary art. The gallery features four separate spaces and boasts over 3,000 square feet. + +**After:** +> Gallery 825 is LAAA's exhibition space for contemporary art. The gallery has four rooms totaling 3,000 square feet. + +--- + +### 9. Negative Parallelisms + +**Problem:** Constructions like "Not only...but..." or "It's not just about..., it's..." are overused. + +**Before:** +> It's not just about the beat riding under the vocals; it's part of the aggression and atmosphere. It's not merely a song, it's a statement. + +**After:** +> The heavy beat adds to the aggressive tone. + +--- + +### 10. Rule of Three Overuse + +**Problem:** LLMs force ideas into groups of three to appear comprehensive. + +**Before:** +> The event features keynote sessions, panel discussions, and networking opportunities. Attendees can expect innovation, inspiration, and industry insights. + +**After:** +> The event includes talks and panels. There's also time for informal networking between sessions. + +--- + +### 11. Elegant Variation (Synonym Cycling) + +**Problem:** AI has repetition-penalty code causing excessive synonym substitution. + +**Before:** +> The protagonist faces many challenges. The main character must overcome obstacles. The central figure eventually triumphs. The hero returns home. + +**After:** +> The protagonist faces many challenges but eventually triumphs and returns home. + +--- + +### 12. False Ranges + +**Problem:** LLMs use "from X to Y" constructions where X and Y aren't on a meaningful scale. + +**Before:** +> Our journey through the universe has taken us from the singularity of the Big Bang to the grand cosmic web, from the birth and death of stars to the enigmatic dance of dark matter. + +**After:** +> The book covers the Big Bang, star formation, and current theories about dark matter. + +--- + +## STYLE PATTERNS + +### 13. Em Dash Overuse + +**Problem:** LLMs use em dashes (—) more than humans, mimicking "punchy" sales writing. + +**Before:** +> The term is primarily promoted by Dutch institutions—not by the people themselves. You don't say "Netherlands, Europe" as an address—yet this mislabeling continues—even in official documents. + +**After:** +> The term is primarily promoted by Dutch institutions, not by the people themselves. You don't say "Netherlands, Europe" as an address, yet this mislabeling continues in official documents. + +--- + +### 14. Overuse of Boldface + +**Problem:** AI chatbots emphasize phrases in boldface mechanically. + +**Before:** +> It blends **OKRs (Objectives and Key Results)**, **KPIs (Key Performance Indicators)**, and visual strategy tools such as the **Business Model Canvas (BMC)** and **Balanced Scorecard (BSC)**. + +**After:** +> It blends OKRs, KPIs, and visual strategy tools like the Business Model Canvas and Balanced Scorecard. + +--- + +### 15. Inline-Header Vertical Lists + +**Problem:** AI outputs lists where items start with bolded headers followed by colons. + +**Before:** +> - **User Experience:** The user experience has been significantly improved with a new interface. +> - **Performance:** Performance has been enhanced through optimized algorithms. +> - **Security:** Security has been strengthened with end-to-end encryption. + +**After:** +> The update improves the interface, speeds up load times through optimized algorithms, and adds end-to-end encryption. + +--- + +### 16. Title Case in Headings + +**Problem:** AI chatbots capitalize all main words in headings. + +**Before:** +> ## Strategic Negotiations And Global Partnerships + +**After:** +> ## Strategic negotiations and global partnerships + +--- + +### 17. Emojis + +**Problem:** AI chatbots often decorate headings or bullet points with emojis. + +**Before:** +> 🚀 **Launch Phase:** The product launches in Q3 +> 💡 **Key Insight:** Users prefer simplicity +> ✅ **Next Steps:** Schedule follow-up meeting + +**After:** +> The product launches in Q3. User research showed a preference for simplicity. Next step: schedule a follow-up meeting. + +--- + +### 18. Curly Quotation Marks + +**Problem:** ChatGPT uses curly quotes (“...”) instead of straight quotes ("..."). + +**Before:** +> He said “the project is on track” but others disagreed. + +**After:** +> He said "the project is on track" but others disagreed. + +--- + +## COMMUNICATION PATTERNS + +### 19. Collaborative Communication Artifacts + +**Words to watch:** I hope this helps, Of course!, Certainly!, You're absolutely right!, Would you like..., let me know, here is a... + +**Problem:** Text meant as chatbot correspondence gets pasted as content. + +**Before:** +> Here is an overview of the French Revolution. I hope this helps! Let me know if you'd like me to expand on any section. + +**After:** +> The French Revolution began in 1789 when financial crisis and food shortages led to widespread unrest. + +--- + +### 20. Knowledge-Cutoff Disclaimers + +**Words to watch:** as of [date], Up to my last training update, While specific details are limited/scarce..., based on available information... + +**Problem:** AI disclaimers about incomplete information get left in text. + +**Before:** +> While specific details about the company's founding are not extensively documented in readily available sources, it appears to have been established sometime in the 1990s. + +**After:** +> The company was founded in 1994, according to its registration documents. + +--- + +### 21. Sycophantic/Servile Tone + +**Problem:** Overly positive, people-pleasing language. + +**Before:** +> Great question! You're absolutely right that this is a complex topic. That's an excellent point about the economic factors. + +**After:** +> The economic factors you mentioned are relevant here. + +--- + +## FILLER AND HEDGING + +### 22. Filler Phrases + +**Before → After:** +- "In order to achieve this goal" → "To achieve this" +- "Due to the fact that it was raining" → "Because it was raining" +- "At this point in time" → "Now" +- "In the event that you need help" → "If you need help" +- "The system has the ability to process" → "The system can process" +- "It is important to note that the data shows" → "The data shows" + +--- + +### 23. Excessive Hedging + +**Problem:** Over-qualifying statements. + +**Before:** +> It could potentially possibly be argued that the policy might have some effect on outcomes. + +**After:** +> The policy may affect outcomes. + +--- + +### 24. Generic Positive Conclusions + +**Problem:** Vague upbeat endings. + +**Before:** +> The future looks bright for the company. Exciting times lie ahead as they continue their journey toward excellence. This represents a major step in the right direction. + +**After:** +> The company plans to open two more locations next year. + +--- + +## Process + +1. Read the input text carefully +2. Identify all instances of the patterns above +3. Rewrite each problematic section +4. Ensure the revised text: + - Sounds natural when read aloud + - Varies sentence structure naturally + - Uses specific details over vague claims + - Maintains appropriate tone for context + - Uses simple constructions (is/are/has) where appropriate +5. Present the humanized version + +## Output Format + +Provide: +1. The rewritten text +2. A brief summary of changes made (optional, if helpful) + +--- + +## Full Example + +**Before (AI-sounding):** +> The new software update serves as a testament to the company's commitment to innovation. Moreover, it provides a seamless, intuitive, and powerful user experience—ensuring that users can accomplish their goals efficiently. It's not just an update, it's a revolution in how we think about productivity. Industry experts believe this will have a lasting impact on the entire sector, highlighting the company's pivotal role in the evolving technological landscape. + +**After (Humanized):** +> The software update adds batch processing, keyboard shortcuts, and offline mode. Early feedback from beta testers has been positive, with most reporting faster task completion. + +**Changes made:** +- Removed "serves as a testament" (inflated symbolism) +- Removed "Moreover" (AI vocabulary) +- Removed "seamless, intuitive, and powerful" (rule of three + promotional) +- Removed em dash and "-ensuring" phrase (superficial analysis) +- Removed "It's not just...it's..." (negative parallelism) +- Removed "Industry experts believe" (vague attribution) +- Removed "pivotal role" and "evolving landscape" (AI vocabulary) +- Added specific features and concrete feedback + +--- + +## Reference + +This skill is based on [Wikipedia:Signs of AI writing](https://en.wikipedia.org/wiki/Wikipedia:Signs_of_AI_writing), maintained by WikiProject AI Cleanup. The patterns documented there come from observations of thousands of instances of AI-generated text on Wikipedia. + +Key insight from Wikipedia: "LLMs use statistical algorithms to guess what should come next. The result tends toward the most statistically likely result that applies to the widest variety of cases." diff --git a/.claude/skills/humanizer b/.claude/skills/humanizer new file mode 120000 index 00000000..796cf7bd --- /dev/null +++ b/.claude/skills/humanizer @@ -0,0 +1 @@ +../../.agents/skills/humanizer \ No newline at end of file diff --git a/.cline/skills/humanizer b/.cline/skills/humanizer new file mode 120000 index 00000000..796cf7bd --- /dev/null +++ b/.cline/skills/humanizer @@ -0,0 +1 @@ +../../.agents/skills/humanizer \ No newline at end of file diff --git a/.cursor/skills/humanizer b/.cursor/skills/humanizer new file mode 120000 index 00000000..796cf7bd --- /dev/null +++ b/.cursor/skills/humanizer @@ -0,0 +1 @@ +../../.agents/skills/humanizer \ No newline at end of file diff --git a/.windsurf/skills/humanizer b/.windsurf/skills/humanizer new file mode 120000 index 00000000..796cf7bd --- /dev/null +++ b/.windsurf/skills/humanizer @@ -0,0 +1 @@ +../../.agents/skills/humanizer \ No newline at end of file diff --git a/docs/static/img/ralph/banana.webp b/docs/static/img/ralph/banana.webp new file mode 100644 index 00000000..bc879f34 Binary files /dev/null and b/docs/static/img/ralph/banana.webp differ diff --git a/src/cli.ts b/src/cli.ts index c4bf8002..004feba2 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -140,6 +140,7 @@ program '--no-visual-check', 'Disable visual comparison validation (auto-enabled when Figma screenshots exist)' ) + .option('--review', 'Run LLM-powered diff review before commit (catches security/logic issues)') // Swarm mode options .option('--swarm', 'Run with multiple agents in parallel (swarm mode)') .option( diff --git a/src/commands/run.ts b/src/commands/run.ts index cab1fa4d..caf60fb1 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -315,6 +315,8 @@ export interface RunCommandOptions { strategy?: 'race' | 'consensus' | 'pipeline'; // Amp options ampMode?: 'smart' | 'rush' | 'deep'; + // Agent reviewer + review?: boolean; } export async function runCommand( @@ -1490,6 +1492,7 @@ Focus on one task at a time. After completing a task, update IMPLEMENTATION_PLAN visualValidation, figmaScreenshotPaths, ampMode: options.ampMode, + review: options.review, headless, enableSkills: options.autoSkills !== false, }; diff --git a/src/index.ts b/src/index.ts index bfbcf09f..449ff5de 100644 --- a/src/index.ts +++ b/src/index.ts @@ -45,6 +45,8 @@ export { CostTracker, resolveModelPricing } from './loop/cost-tracker.js'; export type { IterationUpdate, LoopOptions, LoopResult } from './loop/executor.js'; export { runLoop } from './loop/executor.js'; export { appendProjectMemory, readProjectMemory } from './loop/memory.js'; +export type { ReviewFinding, ReviewResult, ReviewSeverity } from './loop/reviewer.js'; +export { runReview } from './loop/reviewer.js'; export type { SwarmAgentResult, SwarmConfig, SwarmResult, SwarmStrategy } from './loop/swarm.js'; export { runSwarm } from './loop/swarm.js'; export { detectValidationCommands, runAllValidations, runValidation } from './loop/validation.js'; diff --git a/src/loop/executor.ts b/src/loop/executor.ts index 377abcee..2f3f0ae3 100644 --- a/src/loop/executor.ts +++ b/src/loop/executor.ts @@ -38,6 +38,7 @@ import { estimateLoop, formatEstimateDetailed } from './estimator.js'; import { appendProjectMemory, formatMemoryPrompt, readProjectMemory } from './memory.js'; import { checkFileBasedCompletion, createProgressTracker, type ProgressEntry } from './progress.js'; import { RateLimiter } from './rate-limiter.js'; +import { formatReviewAsValidation, formatReviewFeedback, runReview } from './reviewer.js'; import { analyzeResponse, hasExitSignal } from './semantic-analyzer.js'; import { detectClaudeSkills, formatSkillsForPrompt } from './skills.js'; import { detectStepFromOutput } from './step-detector.js'; @@ -270,6 +271,12 @@ export type LoopOptions = { env?: Record; /** Amp agent mode: smart, rush, deep */ ampMode?: import('./agents.js').AmpMode; + /** Run LLM-powered diff review after validation passes (before commit) */ + review?: boolean; + /** Product name shown in logs/UI (default: 'Ralph-Starter'). Set to white-label when embedding. */ + productName?: string; + /** Dot-directory for memory/iteration-log/activity (default: '.ralph'). */ + dotDir?: string; /** API key for SDK-based agents */ apiKey?: string; /** Allow the anthropic-sdk agent to execute shell commands. Disabled by default for safety. */ @@ -406,13 +413,14 @@ function appendIterationLog( iteration: number, summary: string, validationPassed: boolean, - hasChanges: boolean + hasChanges: boolean, + dotDir = '.ralph' ): void { try { - const ralphDir = join(cwd, '.ralph'); - if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + const stateDir = join(cwd, dotDir); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); - const logPath = join(ralphDir, 'iteration-log.md'); + const logPath = join(stateDir, 'iteration-log.md'); const entry = `## Iteration ${iteration} - Status: ${validationPassed ? 'validation passed' : 'validation failed'} - Changes: ${hasChanges ? 'yes' : 'no files changed'} @@ -428,9 +436,13 @@ function appendIterationLog( * Read the last N iteration summaries from .ralph/iteration-log.md. * Used by context-builder to give the agent memory of previous iterations. */ -export function readIterationLog(cwd: string, maxEntries = 3): string | undefined { +export function readIterationLog( + cwd: string, + maxEntries = 3, + dotDir = '.ralph' +): string | undefined { try { - const logPath = join(cwd, '.ralph', 'iteration-log.md'); + const logPath = join(cwd, dotDir, 'iteration-log.md'); if (!existsSync(logPath)) return undefined; const content = readFileSync(logPath, 'utf-8'); @@ -508,6 +520,9 @@ export async function runLoop(options: LoopOptions): Promise { isSpinning: false, } : ora(); + const productName = options.productName || 'Ralph-Starter'; + const dotDir = options.dotDir || '.ralph'; + let maxIterations = options.maxIterations || 50; const commits: string[] = []; const startTime = Date.now(); @@ -528,7 +543,7 @@ export async function runLoop(options: LoopOptions): Promise { // Initialize progress tracker const progressTracker = options.trackProgress - ? createProgressTracker(options.cwd, options.task) + ? createProgressTracker(options.cwd, options.task, dotDir) : null; // Initialize cost tracker @@ -565,10 +580,10 @@ export async function runLoop(options: LoopOptions): Promise { } // Inject project memory from previous runs (if available) - const projectMemory = readProjectMemory(options.cwd); + const projectMemory = readProjectMemory(options.cwd, dotDir); if (projectMemory) { - taskWithSkills = `${taskWithSkills}\n\n${formatMemoryPrompt(projectMemory)}`; - log(chalk.dim(' Project memory loaded from .ralph/memory.md')); + taskWithSkills = `${taskWithSkills}\n\n${formatMemoryPrompt(projectMemory, dotDir)}`; + log(chalk.dim(` Project memory loaded from ${dotDir}/memory.md`)); } // Build abbreviated spec summary for context builder (iterations 2+) @@ -590,7 +605,7 @@ export async function runLoop(options: LoopOptions): Promise { // Show startup summary box const startupLines: string[] = []; - startupLines.push(chalk.cyan.bold(' Ralph-Starter')); + startupLines.push(chalk.cyan.bold(` ${productName}`)); startupLines.push(` Agent: ${chalk.white(options.agent.name)}`); startupLines.push(` Max loops: ${chalk.white(String(maxIterations))}`); if (validationCommands.length > 0) { @@ -876,7 +891,7 @@ export async function runLoop(options: LoopOptions): Promise { // Build iteration-specific task with smart context windowing // Read iteration log for inter-iteration memory (iterations 2+) - const iterationLog = i > 1 ? readIterationLog(options.cwd) : undefined; + const iterationLog = i > 1 ? readIterationLog(options.cwd, 3, dotDir) : undefined; const builtContext = buildIterationContext({ fullTask: options.task, @@ -1503,6 +1518,82 @@ export async function runLoop(options: LoopOptions): Promise { } } + // --- Agent reviewer: LLM-powered diff review before commit --- + if (options.review && hasChanges && i > 1 && pastWarmup) { + spinner.start(chalk.yellow(`Loop ${i}: Running agent review...`)); + try { + const reviewResult = await runReview(options.cwd); + if (reviewResult && !reviewResult.passed) { + const reviewValidation = formatReviewAsValidation(reviewResult); + validationResults.push(reviewValidation); + const feedback = formatReviewFeedback(reviewResult); + spinner.fail( + chalk.red( + `Loop ${i}: Agent review found ${reviewResult.findings.filter((f) => f.severity === 'error').length} error(s)` + ) + ); + for (const f of reviewResult.findings) { + const icon = f.severity === 'error' ? '❌' : f.severity === 'warning' ? '⚠️' : 'ℹ️'; + const location = f.file ? ` (${f.file}${f.line ? `:${f.line}` : ''})` : ''; + log(chalk.dim(` ${icon}${location} ${f.message}`)); + } + + validationFailures++; + const reviewErrorMsg = reviewResult.findings + .map((f) => `[${f.severity}] ${f.file ?? ''} ${f.message}`) + .join('\n'); + const tripped = circuitBreaker.recordFailure(reviewErrorMsg); + if (tripped) { + if (progressTracker && progressEntry) { + progressEntry.status = 'failed'; + progressEntry.summary = `Circuit breaker tripped (agent-review)`; + progressEntry.validationResults = validationResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + finalIteration = i; + exitReason = 'circuit_breaker'; + break; + } + + lastValidationFeedback = feedback; + + if (progressTracker && progressEntry) { + progressEntry.status = 'validation_failed'; + progressEntry.summary = 'Agent review failed'; + progressEntry.validationResults = validationResults; + progressEntry.duration = Date.now() - iterationStart; + await progressTracker.appendEntry(progressEntry); + } + + continue; + } + if (reviewResult) { + const warnFindings = reviewResult.findings.filter((f) => f.severity === 'warning'); + const infoFindings = reviewResult.findings.filter((f) => f.severity === 'info'); + const parts: string[] = []; + if (warnFindings.length > 0) parts.push(`${warnFindings.length} warning(s)`); + if (infoFindings.length > 0) parts.push(`${infoFindings.length} info`); + const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : ''; + spinner.succeed(chalk.green(`Loop ${i}: Agent review passed${suffix}`)); + for (const f of [...warnFindings, ...infoFindings]) { + const icon = f.severity === 'warning' ? '⚠️' : 'ℹ️'; + log(chalk.dim(` ${icon} ${f.message}`)); + } + circuitBreaker.recordSuccess(); + lastValidationFeedback = ''; + } else { + spinner.info(chalk.dim(`Loop ${i}: Agent review skipped (no diff or no LLM key)`)); + } + } catch (err) { + spinner.warn( + chalk.yellow( + `Loop ${i}: Agent review skipped (${err instanceof Error ? err.message : 'unknown error'})` + ) + ); + } + } + // Auto-commit if enabled and there are changes let committed = false; let commitMsg = ''; @@ -1554,7 +1645,7 @@ export async function runLoop(options: LoopOptions): Promise { // Write iteration summary for inter-iteration memory const iterSummary = summarizeChanges(result.output); const iterValidationPassed = validationResults.every((r) => r.success); - appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges); + appendIterationLog(options.cwd, i, iterSummary, iterValidationPassed, hasChanges, dotDir); if (status === 'done') { const completionReason = completionResult.reason || 'Task marked as complete by agent'; @@ -1693,7 +1784,7 @@ export async function runLoop(options: LoopOptions): Promise { if (costTracker) { memorySummary.push(`Cost: ${formatCost(costTracker.getStats().totalCost.totalCost)}`); } - appendProjectMemory(options.cwd, memorySummary.join('\n')); + appendProjectMemory(options.cwd, memorySummary.join('\n'), dotDir); return { success: exitReason === 'completed' || exitReason === 'file_signal', diff --git a/src/loop/memory.ts b/src/loop/memory.ts index b3ffd90a..4c705d3e 100644 --- a/src/loop/memory.ts +++ b/src/loop/memory.ts @@ -15,9 +15,9 @@ const MAX_MEMORY_BYTES = 8 * 1024; // 8KB max — keeps context window usage rea * Read the project memory file. * Returns undefined if no memory exists yet. */ -export function readProjectMemory(cwd: string): string | undefined { +export function readProjectMemory(cwd: string, dotDir = '.ralph'): string | undefined { try { - const memoryPath = join(cwd, '.ralph', MEMORY_FILE); + const memoryPath = join(cwd, dotDir, MEMORY_FILE); if (!existsSync(memoryPath)) return undefined; const content = readFileSync(memoryPath, 'utf-8').trim(); @@ -45,12 +45,12 @@ export function readProjectMemory(cwd: string): string | undefined { /** * Append an entry to the project memory file. */ -export function appendProjectMemory(cwd: string, entry: string): void { +export function appendProjectMemory(cwd: string, entry: string, dotDir = '.ralph'): void { try { - const ralphDir = join(cwd, '.ralph'); - if (!existsSync(ralphDir)) mkdirSync(ralphDir, { recursive: true }); + const stateDir = join(cwd, dotDir); + if (!existsSync(stateDir)) mkdirSync(stateDir, { recursive: true }); - const memoryPath = join(ralphDir, MEMORY_FILE); + const memoryPath = join(stateDir, MEMORY_FILE); const timestamp = new Date().toISOString().split('T')[0]; const formatted = `## ${timestamp}\n${entry.trim()}\n\n`; @@ -63,13 +63,13 @@ export function appendProjectMemory(cwd: string, entry: string): void { /** * Format memory content as a prompt section for injection into agent context. */ -export function formatMemoryPrompt(memory: string): string { +export function formatMemoryPrompt(memory: string, dotDir = '.ralph'): string { return `## Project Memory (from previous runs) -The following notes were saved from previous ralph-starter runs on this project. +The following notes were saved from previous runs on this project. Use them to understand project conventions and avoid repeating mistakes. ${memory} -If you discover new project conventions or important patterns, append them to \`.ralph/memory.md\`. +If you discover new project conventions or important patterns, append them to \`${dotDir}/memory.md\`. `; } diff --git a/src/loop/progress.ts b/src/loop/progress.ts index e00e04a5..ce13e918 100644 --- a/src/loop/progress.ts +++ b/src/loop/progress.ts @@ -21,7 +21,7 @@ export interface ProgressTracker { clear(): Promise; } -const ACTIVITY_FILE = '.ralph/activity.md'; +const DEFAULT_ACTIVITY_DIR = '.ralph'; /** * Format a progress entry as markdown @@ -128,8 +128,12 @@ function getFileHeader(task: string): string { /** * Create a progress tracker for a directory */ -export function createProgressTracker(cwd: string, task: string): ProgressTracker { - const filePath = path.join(cwd, ACTIVITY_FILE); +export function createProgressTracker( + cwd: string, + task: string, + dotDir = DEFAULT_ACTIVITY_DIR +): ProgressTracker { + const filePath = path.join(cwd, dotDir, 'activity.md'); const dirPath = path.dirname(filePath); let initialized = false; diff --git a/src/loop/reviewer.ts b/src/loop/reviewer.ts new file mode 100644 index 00000000..3591654a --- /dev/null +++ b/src/loop/reviewer.ts @@ -0,0 +1,221 @@ +/** + * Agent Reviewer — LLM-powered diff review step for the executor loop. + * + * Slots into the validation pipeline after lint/build/test pass but before commit. + * Analyzes the current git diff and returns structured feedback that can be fed + * back into the next iteration via lastValidationFeedback. + */ + +import { execa } from 'execa'; +import { tryCallLLM } from '../llm/api.js'; +import type { ValidationResult } from './validation.js'; + +/** Maximum diff size in characters to send to the LLM (avoid context overflow) */ +const MAX_DIFF_CHARS = 30_000; + +/** Review severity levels */ +export type ReviewSeverity = 'error' | 'warning' | 'info'; + +export type ReviewFinding = { + severity: ReviewSeverity; + message: string; + file?: string; + line?: number; +}; + +export type ReviewResult = { + passed: boolean; + findings: ReviewFinding[]; + model?: string; + /** Raw LLM response for debugging */ + raw?: string; +}; + +/** + * Get all working-tree changes as a single coherent diff. + * Prefers `git diff HEAD` (covers staged + unstaged in one pass). + * Falls back to combining staged + unstaged for repos with no commits. + */ +async function getDiff(cwd: string): Promise { + // Register untracked files so git diff HEAD includes new files + await execa('git', ['add', '--intent-to-add', '--all'], { cwd, reject: false }); + + const head = await execa('git', ['diff', 'HEAD'], { cwd, reject: false }); + + // Clean up: remove intent-to-add entries so the index isn't permanently mutated + await execa('git', ['restore', '--staged', '.'], { cwd, reject: false }); + + if (head.stdout.trim()) return head.stdout; + + // Fallback for repos with no commits yet: combine staged + unstaged + const [staged, unstaged] = await Promise.all([ + execa('git', ['diff', '--cached'], { cwd, reject: false }), + execa('git', ['diff'], { cwd, reject: false }), + ]); + return [staged.stdout, unstaged.stdout].filter(Boolean).join('\n'); +} + +const REVIEW_SYSTEM_PROMPT = `You are an expert code reviewer embedded in an automated coding loop. Your job is to review a git diff and catch issues that lint, build, and test checks cannot detect. + +Focus on: +1. **Security**: Hardcoded secrets, SQL injection, XSS, insecure defaults, exposed API keys +2. **Logic errors**: Off-by-one, race conditions, null/undefined access, wrong operator +3. **Pattern violations**: Not following the codebase's existing patterns/conventions +4. **Missing error handling**: Unhandled promises, missing try/catch at boundaries +5. **Performance**: Obvious N+1 queries, missing indexes, unbounded loops/allocations + +Do NOT flag: +- Style issues (handled by lint) +- Type errors (handled by build/typecheck) +- Missing tests (handled by test runner) +- Minor naming preferences + +Respond with a JSON array of findings. Each finding has: +- "severity": "error" | "warning" | "info" +- "message": concise description of the issue +- "file": the affected file path (if identifiable from the diff) +- "line": the relevant line number in the file (if identifiable from the diff) + +If the diff looks clean, respond with an empty array: [] + +Respond ONLY with the JSON array, no markdown fences or explanation.`; + +/** + * Run the agent reviewer on the current diff. + * Returns null if no diff exists or no LLM provider is available. + */ +export async function runReview(cwd: string): Promise { + const diff = await getDiff(cwd); + if (!diff.trim()) return null; + + // Truncate large diffs at a line boundary to avoid malformed patch hunks + const truncatedDiff = (() => { + if (diff.length <= MAX_DIFF_CHARS) return diff; + const cutIndex = diff.lastIndexOf('\n', MAX_DIFF_CHARS); + const slice = cutIndex > 0 ? diff.slice(0, cutIndex) : diff.slice(0, MAX_DIFF_CHARS); + return `${slice}\n\n... (diff truncated at ${MAX_DIFF_CHARS} chars, ${diff.length} total)`; + })(); + + const response = await tryCallLLM({ + system: REVIEW_SYSTEM_PROMPT, + prompt: `Review this diff:\n\n\`\`\`diff\n${truncatedDiff}\n\`\`\``, + maxTokens: 2048, + }); + + if (!response) return null; + + // Parse JSON findings from the response — null means LLM returned unparseable output + const findings = parseFindings(response.content); + if (!findings) return null; + + const hasErrors = findings.some((f) => f.severity === 'error'); + + return { + passed: !hasErrors, + findings, + model: response.model, + raw: response.content, + }; +} + +/** + * Parse review findings from LLM response. + * Handles both raw JSON arrays and markdown-fenced JSON. + */ +function parseFindings(content: string): ReviewFinding[] | null { + // Strip markdown code fences if present + const cleaned = content + .replace(/^```(?:json)?\s*\n?/m, '') + .replace(/\n?```\s*$/m, '') + .trim(); + + try { + const parsed = JSON.parse(cleaned); + if (!Array.isArray(parsed)) return null; + + // Empty array = clean review + if (parsed.length === 0) return []; + + const valid = parsed + .filter( + (f: unknown): f is { severity: string; message: string; file?: string; line?: number } => + typeof f === 'object' && + f !== null && + 'severity' in f && + 'message' in f && + typeof (f as Record).message === 'string' + ) + .map((f) => ({ + severity: (['error', 'warning', 'info'].includes(f.severity) + ? f.severity + : 'warning') as ReviewSeverity, + message: f.message, + file: typeof f.file === 'string' ? f.file : undefined, + line: typeof f.line === 'number' ? f.line : undefined, + })); + + // Non-empty array but all items malformed = indeterminate + if (valid.length === 0) return null; + + return valid; + } catch { + // LLM returned non-JSON — caller should treat as indeterminate (skipped), not passed + return null; + } +} + +/** + * Format review findings as a ValidationResult for the executor loop. + * This allows the reviewer to slot into the existing validation pipeline. + */ +export function formatReviewAsValidation(result: ReviewResult): ValidationResult { + if (result.passed && result.findings.length === 0) { + return { + success: true, + command: 'agent-review', + output: 'No issues found', + }; + } + + const lines: string[] = []; + for (const finding of result.findings) { + const icon = finding.severity === 'error' ? '❌' : finding.severity === 'warning' ? '⚠️' : 'ℹ️'; + const location = finding.file + ? ` (${finding.file}${finding.line ? `:${finding.line}` : ''})` + : ''; + lines.push(`${icon} [${finding.severity.toUpperCase()}]${location}: ${finding.message}`); + } + + return { + success: result.passed, + command: 'agent-review', + output: lines.join('\n'), + ...(result.passed ? {} : { error: lines.join('\n') }), + }; +} + +/** + * Format review findings as feedback text for the lastValidationFeedback mechanism. + */ +export function formatReviewFeedback(result: ReviewResult): string { + if (result.findings.length === 0) return ''; + + const hasFailed = !result.passed; + const feedback = [hasFailed ? '## Agent Review Failed\n' : '## Agent Review Warnings\n']; + feedback.push( + `The automated code reviewer (${result.model || 'unknown'}) found issues in your changes:\n` + ); + + for (const finding of result.findings) { + const location = finding.file + ? ` in \`${finding.file}${finding.line ? `:${finding.line}` : ''}\`` + : ''; + feedback.push(`- **${finding.severity.toUpperCase()}**${location}: ${finding.message}`); + } + + if (hasFailed) { + feedback.push('\nPlease fix the errors above before continuing.'); + } + + return feedback.join('\n'); +}