Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 61 additions & 14 deletions src/__tests__/agent-runner.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
import { describe, it } from "node:test";
import { describe, it, before, after } from "node:test";
import assert from "node:assert/strict";
import { AgentRunner } from "../agent-runner.js";
import { createTask } from "../types.js";
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";

describe("AgentRunner", () => {
// Temp directories for language detection tests
let tsDir: string;
let pyDir: string;
let jsDir: string;

before(() => {
tsDir = mkdtempSync(join(tmpdir(), "test-ts-"));
writeFileSync(join(tsDir, "tsconfig.json"), "{}");
pyDir = mkdtempSync(join(tmpdir(), "test-py-"));
writeFileSync(join(pyDir, "pyproject.toml"), "");
jsDir = mkdtempSync(join(tmpdir(), "test-js-"));
writeFileSync(join(jsDir, "package.json"), "{}");
});

after(() => {
rmSync(tsDir, { recursive: true, force: true });
rmSync(pyDir, { recursive: true, force: true });
rmSync(jsDir, { recursive: true, force: true });
});

// ── estimateCost ──

it("estimateCost returns correct values for sonnet model", () => {
Expand Down Expand Up @@ -55,11 +78,36 @@ describe("AgentRunner", () => {

// ── buildSystemPrompt ──

it("buildSystemPrompt includes tsc instruction", () => {
it("buildSystemPrompt includes tsc instruction for TypeScript projects", () => {
const runner = new AgentRunner();
const task = createTask("fix the bug");
const prompt = runner.buildSystemPrompt(task, tsDir);
assert.ok(prompt.includes("npx tsc"), "prompt should include npx tsc instruction for TS projects");
});

it("buildSystemPrompt includes python instructions for Python projects", () => {
const runner = new AgentRunner();
const task = createTask("fix the bug");
const prompt = runner.buildSystemPrompt(task, pyDir);
assert.ok(prompt.includes("test suite"), "prompt should include test suite instruction for Python projects");
assert.ok(prompt.includes("linter"), "prompt should include linter instruction for Python projects");
assert.ok(!prompt.includes("npx tsc"), "prompt should not include tsc for Python projects");
});

it("buildSystemPrompt includes npm test instruction for JavaScript projects", () => {
const runner = new AgentRunner();
const task = createTask("fix the bug");
const prompt = runner.buildSystemPrompt(task, jsDir);
assert.ok(prompt.includes("npm test"), "prompt should include npm test for JS projects");
assert.ok(!prompt.includes("npx tsc"), "prompt should not include tsc for JS projects");
});

it("buildSystemPrompt includes only commit instruction for unknown projects", () => {
const runner = new AgentRunner();
const task = createTask("fix the bug");
const prompt = runner.buildSystemPrompt(task, "/nonexistent-path-xyz");
assert.ok(prompt.includes("npx tsc"), "prompt should include npx tsc instruction");
assert.ok(prompt.includes("git add -A"), "prompt should include commit instruction");
assert.ok(!prompt.includes("npx tsc"), "prompt should not include tsc for unknown projects");
});

it("buildSystemPrompt includes test runner hints for test-related tasks", () => {
Expand Down Expand Up @@ -188,11 +236,10 @@ describe("AgentRunner", () => {
const runner = new AgentRunner();
const task = createTask("hello", { agent: "echo", timeout: 5 });
await runner.run(task, "/tmp");
// echo succeeds → output captured; verifyBuild fails (no tsconfig in /tmp) → status "failed" with [TSC_FAILED]
// Validates: (1) generic agent runs and captures output, (2) build verification is enforced
// echo succeeds → output captured; verifyBuild skips tsc (no tsconfig in /tmp) → status "success"
// Validates: (1) generic agent runs and captures output, (2) build verification is skipped for non-TS
assert.ok(task.output.includes("hello"), "output should contain the prompt text from echo");
assert.strictEqual(task.status, "failed", "should fail due to tsc verification in /tmp");
assert.ok(task.output.startsWith("[TSC_FAILED]"), "output should be prefixed with [TSC_FAILED]");
assert.strictEqual(task.status, "success", "should succeed since /tmp is not a TS project");
assert.ok(task.durationMs > 0, "durationMs should be recorded");
});

Expand Down Expand Up @@ -253,13 +300,13 @@ describe("AgentRunner", () => {
assert.strictEqual(typeof result.errors, "string");
});

it("verifyBuild returns false when tsc fails", async () => {
it("verifyBuild skips tsc for non-TypeScript projects", async () => {
const runner = new AgentRunner();
const verify = (runner as unknown as { verifyBuild: (cwd: string) => Promise<{ ok: boolean; errors: string }> }).verifyBuild.bind(runner);
// /tmp has no tsconfig.json, so tsc will fail
// /tmp has no tsconfig.json, so verifyBuild should skip tsc and return ok
const result = await verify("/tmp");
assert.strictEqual(result.ok, false);
assert.ok(result.errors.length > 0, "should have error message");
assert.strictEqual(result.ok, true, "should return ok for non-TS projects");
assert.strictEqual(result.errors, "", "should have no errors for non-TS projects");
});

// ── buildSystemPrompt edge cases ──
Expand All @@ -269,15 +316,15 @@ describe("AgentRunner", () => {
const task = createTask("fix something");
// Use actual project root where CLAUDE.md exists
const prompt = runner.buildSystemPrompt(task, process.cwd() + "/../..");
// CLAUDE.md in project root has "## Development Rules" section
assert.ok(prompt.includes(".js"), "should include .js extension rule");
// Prompt should always include the commit instruction regardless of language
assert.ok(prompt.includes("git add -A"), "should include commit instruction");
});

it("buildSystemPrompt works without CLAUDE.md file", () => {
const runner = new AgentRunner();
const task = createTask("fix something");
const prompt = runner.buildSystemPrompt(task, "/nonexistent-path");
assert.ok(prompt.includes("npx tsc"), "should still include tsc instruction");
assert.ok(prompt.includes("git add -A"), "should include commit instruction even without CLAUDE.md");
assert.ok(prompt.length > 0);
});

Expand Down
74 changes: 55 additions & 19 deletions src/agent-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { log } from "./logger.js";
import { spawn, type ChildProcess } from "node:child_process";
import { exec as execCb } from "node:child_process";
import { promisify } from "node:util";
import { readFileSync } from "node:fs";
import { readFileSync, existsSync } from "node:fs";

const execAsync = promisify(execCb);

Expand Down Expand Up @@ -114,6 +114,13 @@ export class AgentRunner {
return env;
}

private detectLanguage(cwd: string): "typescript" | "javascript" | "python" | "unknown" {
if (existsSync(`${cwd}/tsconfig.json`)) return "typescript";
if (existsSync(`${cwd}/pyproject.toml`) || existsSync(`${cwd}/setup.py`) || existsSync(`${cwd}/setup.cfg`)) return "python";
if (existsSync(`${cwd}/package.json`)) return "javascript";
return "unknown";
}

buildSystemPrompt(task: Task, cwd: string = process.cwd()): string {
const parts: string[] = [];

Expand All @@ -128,9 +135,17 @@ export class AgentRunner {
// CLAUDE.md not found – skip gracefully
}

// Always-included instructions
parts.push("- Always use `.js` extensions in import paths (e.g. `import { foo } from \"./bar.js\"`).");
parts.push("- After making changes, run `npx tsc` to verify there are no type errors.");
// Language-aware instructions
const lang = this.detectLanguage(cwd);
if (lang === "typescript") {
parts.push("- Always use `.js` extensions in import paths (e.g. `import { foo } from \"./bar.js\"`).");
parts.push("- After making changes, run `npx tsc` to verify there are no type errors.");
} else if (lang === "python") {
parts.push("- After making changes, run the project's test suite to verify.");
parts.push("- Use the project's existing linter (ruff, flake8, etc.) if configured.");
} else if (lang === "javascript") {
parts.push("- After making changes, run `npm test` if configured.");
}
parts.push("- Stage and commit all changes with `git add -A && git commit -m \"feat: <brief summary>\"`.");

// Conditional: test or spec
Expand Down Expand Up @@ -241,7 +256,7 @@ export class AgentRunner {
}

const sysPrompt = this.buildSystemPrompt(task, cwd);
const fullPrompt = this.buildTaskPrompt(task);
const fullPrompt = this.buildTaskPrompt(task, cwd);
const env = this.cleanEnv();

const ac = new AbortController();
Expand Down Expand Up @@ -294,7 +309,7 @@ export class AgentRunner {
private runClaude(task: Task, cwd: string, startMs: number, onEvent?: EventCallback): Promise<void> {
return new Promise((resolve, reject) => {
const sysPrompt = this.buildSystemPrompt(task, cwd);
const fullPrompt = this.buildTaskPrompt(task);
const fullPrompt = this.buildTaskPrompt(task, cwd);

const args = [
"-p",
Expand Down Expand Up @@ -431,7 +446,7 @@ export class AgentRunner {
/** Run task using Codex CLI (exec mode with JSON output). */
private runCodex(task: Task, cwd: string, startMs: number, onEvent?: EventCallback): Promise<void> {
return new Promise((resolve, reject) => {
const fullPrompt = this.buildTaskPrompt(task);
const fullPrompt = this.buildTaskPrompt(task, cwd);

const args = [
"exec",
Expand Down Expand Up @@ -545,7 +560,7 @@ export class AgentRunner {
/** Run task using any generic CLI command. The prompt is appended as the last argument. */
private runGeneric(task: Task, cwd: string, agentCmd: string, startMs: number, onEvent?: EventCallback): Promise<void> {
return new Promise((resolve, reject) => {
const fullPrompt = this.buildTaskPrompt(task);
const fullPrompt = this.buildTaskPrompt(task, cwd);

// Split the agent command on whitespace: e.g. "aider --yes" → ["aider", "--yes"]
const parts = agentCmd.split(/\s+/).filter(Boolean);
Expand Down Expand Up @@ -598,22 +613,43 @@ export class AgentRunner {
}

/** Build the full task prompt with instructions appended. */
private buildTaskPrompt(task: Task): string {
return `${task.prompt}

---

## Instructions
private buildTaskPrompt(task: Task, cwd: string = process.cwd()): string {
const lang = this.detectLanguage(cwd);
const lines: string[] = [
`${task.prompt}`,
"",
"---",
"",
"## Instructions",
"",
"- **Minimal changes**: Only modify what is necessary to complete the task. Do not refactor, reformat, or touch unrelated code.",
];

if (lang === "typescript") {
lines.push("- **TypeScript imports**: Always use `.js` extensions in import paths (e.g. `import { foo } from \"./bar.js\"`).");
lines.push("- **Type checking**: After making changes, run `npx tsc` to catch type errors.");
lines.push("- **Fix before committing**: If `npx tsc` fails, fix all errors before proceeding to commit.");
lines.push("- **Commit when done**: Stage and commit all changes with `git add -A && git commit -m \"feat: <brief summary>\"`.");
} else if (lang === "python") {
lines.push("- **Python project**: Use the project's build system (pyproject.toml / setup.py).");
lines.push("- **Verification**: Run relevant tests to verify changes work.");
lines.push("- **Commit when done**: Stage and commit with `git add -A && git commit -m \"feat: <brief summary>\"`.");
} else if (lang === "javascript") {
lines.push("- **Verification**: Run `npm test` if configured.");
lines.push("- **Commit when done**: Stage and commit with `git add -A && git commit -m \"feat: <brief summary>\"`.");
} else {
lines.push("- **Commit when done**: Stage and commit with `git add -A && git commit -m \"feat: <brief summary>\"`.");
}

- **Minimal changes**: Only modify what is necessary to complete the task. Do not refactor, reformat, or touch unrelated code.
- **TypeScript imports**: Always use \`.js\` extensions in import paths (e.g. \`import { foo } from "./bar.js"\`).
- **Type checking**: After making changes, run \`npx tsc\` to catch type errors.
- **Fix before committing**: If \`npx tsc\` fails, fix all errors before proceeding to commit.
- **Commit when done**: Stage and commit all changes with \`git add -A && git commit -m "feat: <brief summary>"\`.`;
return lines.join("\n");
}

/** Async build verification — does not block the event loop. */
private async verifyBuild(cwd: string): Promise<{ ok: boolean; errors: string }> {
const lang = this.detectLanguage(cwd);
if (lang !== "typescript") {
return { ok: true, errors: "" };
}
try {
await execAsync("npx tsc --noEmit", { cwd, encoding: "utf8" });
return { ok: true, errors: "" };
Expand Down