diff --git a/cli/README.md b/cli/README.md index e4a2e199..c235dd0d 100644 --- a/cli/README.md +++ b/cli/README.md @@ -66,6 +66,10 @@ boundaries: Rules apply to all tasks (single or PRD). +Config safety behavior: +- Invalid config files fall back to defaults with a warning. +- Unsafe prototype-pollution keys are rejected during config parsing. + ## AI Engines ```bash diff --git a/cli/src/cli/commands/run.ts b/cli/src/cli/commands/run.ts index e9fc95b7..6482f168 100644 --- a/cli/src/cli/commands/run.ts +++ b/cli/src/cli/commands/run.ts @@ -40,19 +40,19 @@ export async function runLoop(options: RuntimeOptions): Promise { if (!existsSync(options.prdFile)) { logError(`${options.prdFile} not found in current directory`); logInfo(`Create a ${options.prdFile} file with tasks`); - process.exit(1); + throw new Error(`PRD source not found: ${options.prdFile}`); } } else if (options.prdSource === "markdown-folder") { if (!existsSync(options.prdFile)) { logError(`PRD folder ${options.prdFile} not found`); logInfo(`Create a ${options.prdFile}/ folder with markdown files containing tasks`); - process.exit(1); + throw new Error(`PRD folder not found: ${options.prdFile}`); } } if (options.prdSource === "github" && !options.githubRepo) { logError("GitHub repository not specified. Use --github owner/repo"); - process.exit(1); + throw new Error("GitHub repository not specified"); } // Check engine availability @@ -61,7 +61,7 @@ export async function runLoop(options: RuntimeOptions): Promise { if (!available) { logError(`${engine.name} CLI not found. Make sure '${engine.cliCommand}' is in your PATH.`); - process.exit(1); + throw new Error(`${engine.name} CLI not available`); } // Create task source with caching for better performance @@ -91,7 +91,7 @@ export async function runLoop(options: RuntimeOptions): Promise { logError("Cannot run in parallel/branch mode: repository has no commits yet."); logInfo("Please make an initial commit first:"); logInfo(' git add . && git commit -m "Initial commit"'); - process.exit(1); + throw new Error("Repository has no commits yet"); } } @@ -195,6 +195,6 @@ export async function runLoop(options: RuntimeOptions): Promise { } if (result.tasksFailed > 0) { - process.exit(1); + throw new Error(`${result.tasksFailed} task(s) failed`); } } diff --git a/cli/src/cli/commands/task.ts b/cli/src/cli/commands/task.ts index 312977e7..f34d633c 100644 --- a/cli/src/cli/commands/task.ts +++ b/cli/src/cli/commands/task.ts @@ -28,7 +28,7 @@ export async function runTask(task: string, options: RuntimeOptions): Promise(); + const queue: Array<{ value: unknown; depth: number }> = [{ value: obj, depth: 0 }]; + let nodesVisited = 0; + + while (queue.length > 0) { + const current = queue.shift(); + if (!current) continue; + + nodesVisited++; + if (nodesVisited > MAX_NODES) { + throw new Error("Config file too complex to validate safely"); + } + + if (current.depth > MAX_DEPTH) { + throw new Error("Config file nesting exceeds safety limits"); + } + + if (typeof current.value !== "object" || current.value === null) { + continue; + } + + if (visited.has(current.value)) { + continue; + } + visited.add(current.value); + + for (const key of Object.keys(current.value)) { + if (dangerousKeys.has(key)) return true; + const value = (current.value as Record)[key]; + queue.push({ value, depth: current.depth + 1 }); + } + } + + return false; +} export const CONFIG_FILE = "config.yaml"; export const PROGRESS_FILE = "progress.txt"; @@ -48,10 +97,27 @@ export function loadConfig(workDir = process.cwd()): RalphyConfig | null { try { const content = readFileSync(configPath, "utf-8"); const parsed = YAML.parse(content); + + // BUG FIX: Proper prototype pollution protection with recursive check + // The old string-based check was bypassable via Unicode escapes + if (hasPrototypePollution(parsed)) { + throw new Error("Config file contains potentially malicious prototype pollution keys"); + } + return RalphyConfigSchema.parse(parsed); } catch (error) { - // Log error for debugging, but return default config - console.error(`Warning: Failed to parse config at ${configPath}:`, error); + const message = error instanceof Error ? error.message : String(error); + const isSecurityError = + message.includes("too complex") || + message.includes("nesting exceeds") || + message.includes("prototype pollution"); + if (isSecurityError) { + logError(`Config security violation at ${configPath}: ${message}. Refusing to load config.`); + return null; + } + + logWarn(`Invalid config file at ${configPath}: ${message}. Falling back to defaults.`); + logDebug(`Config parse stack: ${error instanceof Error ? error.stack || message : message}`); return RalphyConfigSchema.parse({}); } } @@ -69,7 +135,7 @@ export function loadRules(workDir = process.cwd()): string[] { */ export function loadBoundaries(workDir = process.cwd()): string[] { const config = loadConfig(workDir); - return config?.boundaries.never_touch ?? []; + return config?.boundaries?.never_touch ?? []; } /** @@ -77,7 +143,14 @@ export function loadBoundaries(workDir = process.cwd()): string[] { */ export function loadTestCommand(workDir = process.cwd()): string { const config = loadConfig(workDir); - return config?.commands.test ?? ""; + const command = config?.commands.test ?? ""; + + if (command && !validateCommand(command)) { + logWarn(`Invalid test command in config: "${command}". Falling back to default.`); + return ""; + } + + return command; } /** @@ -85,7 +158,14 @@ export function loadTestCommand(workDir = process.cwd()): string { */ export function loadLintCommand(workDir = process.cwd()): string { const config = loadConfig(workDir); - return config?.commands.lint ?? ""; + const command = config?.commands.lint ?? ""; + + if (command && !validateCommand(command)) { + logWarn(`Invalid lint command in config: "${command}". Falling back to default.`); + return ""; + } + + return command; } /** @@ -93,7 +173,14 @@ export function loadLintCommand(workDir = process.cwd()): string { */ export function loadBuildCommand(workDir = process.cwd()): string { const config = loadConfig(workDir); - return config?.commands.build ?? ""; + const command = config?.commands.build ?? ""; + + if (command && !validateCommand(command)) { + logWarn(`Invalid build command in config: "${command}". Falling back to default.`); + return ""; + } + + return command; } /** diff --git a/cli/src/config/types.ts b/cli/src/config/types.ts index 6a292aa9..22c8f2b3 100644 --- a/cli/src/config/types.ts +++ b/cli/src/config/types.ts @@ -17,6 +17,7 @@ export const NotificationsSchema = z.object({ discord_webhook: z.string().default(""), slack_webhook: z.string().default(""), custom_webhook: z.string().default(""), + telemetry_webhook: z.string().default(""), }); /** @@ -109,6 +110,8 @@ export interface RuntimeOptions { browserEnabled: "auto" | "true" | "false"; /** Override default model for the engine */ modelOverride?: string; + /** Enable comprehensive OpenCode debugging */ + debugOpenCode?: boolean; /** Skip automatic branch merging after parallel execution */ skipMerge?: boolean; /** Use lightweight sandboxes instead of git worktrees for parallel execution */ @@ -142,4 +145,5 @@ export const DEFAULT_OPTIONS: RuntimeOptions = { githubLabel: "", autoCommit: true, browserEnabled: "auto", + debugOpenCode: false, }; diff --git a/cli/src/config/writer.ts b/cli/src/config/writer.ts index 294df594..1a684706 100644 --- a/cli/src/config/writer.ts +++ b/cli/src/config/writer.ts @@ -1,6 +1,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { appendFile } from "node:fs/promises"; import YAML from "yaml"; +import { logWarn } from "../ui/logger.ts"; import { detectProject } from "./detector.ts"; import { getConfigPath, getProgressPath, getRalphyDir } from "./loader.ts"; import type { RalphyConfig } from "./types.ts"; @@ -35,7 +36,7 @@ rules: # - "Use server actions instead of API routes in Next.js" # # Skills/playbooks (optional): - # - "Before coding, read and follow any relevant skill/playbook docs under .opencode/skills, .claude/skills, or .github/skills." + # - "Before coding, read and follow any relevant skill/playbook docs under .opencode/skills or .claude/skills." # Boundaries - files/folders the AI should not modify boundaries: @@ -49,9 +50,17 @@ boundaries: /** * Escape a value for safe YAML string + * BUG FIX: Use YAML library for proper escaping to prevent injection attacks */ function escapeYaml(value: string | undefined | null): string { - return (value || "").replace(/"/g, '\\"'); + if (!value) return ""; + // Keep the value safe inside an existing double-quoted scalar. + // This prevents quote breakout and newline-based injection. + return value + .replace(/\\/g, "\\\\") + .replace(/"/g, '\\"') + .replace(/\r/g, "\\r") + .replace(/\n/g, "\\n"); } /** @@ -108,44 +117,8 @@ export function addRule(rule: string, workDir = process.cwd()): void { writeFileSync(configPath, YAML.stringify(parsed), "utf-8"); } -/** Queue for batching progress writes */ -const progressWriteQueue: Map = new Map(); -let flushTimeout: ReturnType | null = null; - -/** - * Flush all pending progress writes to disk - */ -async function flushProgressWrites(): Promise { - if (progressWriteQueue.size === 0) return; - - const entries = [...progressWriteQueue.entries()]; - progressWriteQueue.clear(); - flushTimeout = null; - - for (const [path, lines] of entries) { - try { - await appendFile(path, lines.join(""), "utf-8"); - } catch { - // Ignore write errors for progress logging - } - } -} - -/** - * Schedule a flush of progress writes (debounced) - */ -function scheduleFlush(): void { - if (flushTimeout) return; - flushTimeout = setTimeout(() => { - void flushProgressWrites(); - }, 100); // Batch writes within 100ms window -} - /** - * Log a task to the progress file (async, batched) - * - * Performance optimized: uses async I/O and batches writes within 100ms windows - * to reduce file system contention in parallel mode. + * Log a task to the progress file */ export function logTaskProgress( task: string, @@ -162,23 +135,7 @@ export function logTaskProgress( const icon = status === "completed" ? "✓" : "✗"; const line = `- [${icon}] ${timestamp} - ${task}\n`; - // Add to write queue - const existing = progressWriteQueue.get(progressPath) || []; - existing.push(line); - progressWriteQueue.set(progressPath, existing); - - // Schedule async flush - scheduleFlush(); -} - -/** - * Force flush all pending progress writes immediately - * Call this before process exit to ensure all writes are persisted - */ -export async function flushAllProgressWrites(): Promise { - if (flushTimeout) { - clearTimeout(flushTimeout); - flushTimeout = null; - } - await flushProgressWrites(); + void appendFile(progressPath, line, "utf-8").catch((error) => { + logWarn(`Failed to append task progress: ${error}`); + }); } diff --git a/cli/src/engines/validation.test.ts b/cli/src/engines/validation.test.ts new file mode 100644 index 00000000..eeba28c3 --- /dev/null +++ b/cli/src/engines/validation.test.ts @@ -0,0 +1,17 @@ +import { describe, expect, it } from "bun:test"; +import { validateArgs } from "./validation.ts"; + +describe("validateArgs", () => { + it("rejects redirect-like tokens", () => { + expect(validateArgs(["--out", ">", "file.txt"])).toBeNull(); + expect(validateArgs([""])).toBeNull(); + }); + + it("accepts normal safe arguments", () => { + expect(validateArgs(["--model", "gpt-5", "--verbose"])).toEqual([ + "--model", + "gpt-5", + "--verbose", + ]); + }); +}); diff --git a/cli/src/engines/validation.ts b/cli/src/engines/validation.ts new file mode 100644 index 00000000..db6b29eb --- /dev/null +++ b/cli/src/engines/validation.ts @@ -0,0 +1,175 @@ +import { logDebug } from "../ui/logger.ts"; + +// Check platform +const isWindows = process.platform === "win32"; +const DEBUG = process.env.RALPHY_DEBUG === "true"; + +/** + * Maximum lengths to prevent DoS attacks + */ +const MAX_COMMAND_LENGTH = 1000; +const MAX_ARG_LENGTH = 10000; +const MAX_TOTAL_ARGS_LENGTH = 100000; +const MAX_ARG_COUNT = 1000; + +function debugLog(...args: unknown[]): void { + if (DEBUG || (globalThis as { verboseMode?: boolean }).verboseMode === true) { + logDebug(args.map((a) => String(a)).join(" ")); + } +} + +/** + * Validate command name to prevent command injection + * Only allows alphanumeric characters, hyphens, underscores, and dots + * Also allows forward slashes for path-based commands (e.g., ./node_modules/.bin/cli) + */ +function tokenizeCommand(command: string): string[] { + const tokens: string[] = []; + const regex = /[^\s"']+|"([^"]*)"|'([^']*)'/g; + let match = regex.exec(command); + while (match !== null) { + tokens.push(match[1] ?? match[2] ?? match[0]); + match = regex.exec(command); + } + + return tokens; +} + +export function validateCommand(command: string): string | null { + const trimmedCommand = command.trim(); + if (!trimmedCommand) { + debugLog("Command validation failed: command is empty"); + return null; + } + + // Check command length to prevent DoS + if (trimmedCommand.length > MAX_COMMAND_LENGTH) { + debugLog( + `Command validation failed: command too long (${trimmedCommand.length} > ${MAX_COMMAND_LENGTH})`, + ); + return null; + } + + // Block shell metacharacters and dangerous patterns + const dangerousPatterns = [ + /[;&|`]/, // Shell metacharacters + /\$\{/, // Variable expansion + /\$\(/, // Command substitution + /`/, // Backtick substitution + /[<>]/, // Redirection + ]; + + for (const pattern of dangerousPatterns) { + if (pattern.test(trimmedCommand)) { + debugLog(`Command validation failed: dangerous pattern detected in "${trimmedCommand}"`); + return null; + } + } + + const tokens = tokenizeCommand(trimmedCommand); + if (tokens.length === 0) { + debugLog("Command validation failed: no command token found"); + return null; + } + + const [commandToken, ...args] = tokens; + + // Allow executable characters: alphanumeric, hyphen, underscore, dot, slashes. + // Windows also needs drive-letter colon support (e.g., C:\tools\bun.exe). + const validCommandPattern = isWindows ? /^[a-zA-Z0-9._\-\\/:]+$/ : /^[a-zA-Z0-9._\-/]+$/; + + if (!validCommandPattern.test(commandToken)) { + debugLog(`Command validation failed: invalid command token "${commandToken}"`); + return null; + } + + if (args.length > 0 && !validateArgs(args)) { + debugLog(`Command validation failed: invalid args in "${trimmedCommand}"`); + return null; + } + + return trimmedCommand; +} + +/** + * Validate command arguments to prevent injection + * Returns null if any argument contains dangerous patterns + */ +export function validateArgs(args: string[]): string[] | null { + // Check argument count to prevent DoS + if (args.length > MAX_ARG_COUNT) { + debugLog(`Argument validation failed: too many arguments (${args.length} > ${MAX_ARG_COUNT})`); + return null; + } + + // Check total arguments length + const totalLength = args.reduce((sum, arg) => sum + arg.length, 0); + if (totalLength > MAX_TOTAL_ARGS_LENGTH) { + debugLog( + `Argument validation failed: total arguments too long (${totalLength} > ${MAX_TOTAL_ARGS_LENGTH})`, + ); + return null; + } + + const dangerousPatterns = [ + /[;&|`$]/, // Shell metacharacters (including bare $ for variable expansion) + /\$\{/, // Variable expansion + /\$\(/, // Command substitution + /`/, // Backtick substitution + /[<>]/, // Redirection and XML-like payloads + ]; + + for (const arg of args) { + // Check individual argument length + if (arg.length > MAX_ARG_LENGTH) { + debugLog(`Argument validation failed: argument too long (${arg.length} > ${MAX_ARG_LENGTH})`); + return null; + } + + for (const pattern of dangerousPatterns) { + if (pattern.test(arg)) { + debugLog(`Argument validation failed: dangerous pattern in "${arg}"`); + return null; + } + } + } + + return args; +} + +/** + * Validation result type + */ +export interface ValidationResult { + valid: boolean; + command?: string; + args?: string[]; + error?: string; +} + +/** + * Validate both command and arguments in one call + */ +export function validateCommandAndArgs(command: string, args: string[]): ValidationResult { + const validatedCommand = validateCommand(command); + if (!validatedCommand) { + return { + valid: false, + error: "Invalid command - potential command injection detected", + }; + } + + const validatedArgs = validateArgs(args); + if (!validatedArgs) { + return { + valid: false, + error: "Invalid arguments - potential command injection detected", + }; + } + + return { + valid: true, + command: validatedCommand, + args: validatedArgs, + }; +} diff --git a/cli/src/execution/parallel.ts b/cli/src/execution/parallel.ts index 5318088e..53f2a6a0 100644 --- a/cli/src/execution/parallel.ts +++ b/cli/src/execution/parallel.ts @@ -1,5 +1,5 @@ import { copyFileSync, cpSync, existsSync, mkdirSync } from "node:fs"; -import { join } from "node:path"; +import { dirname, isAbsolute, join, normalize, relative, resolve, sep } from "node:path"; import simpleGit from "simple-git"; import { PROGRESS_FILE, RALPHY_DIR } from "../config/loader.ts"; import { logTaskProgress } from "../config/writer.ts"; @@ -41,6 +41,30 @@ interface ParallelAgentResult { usedSandbox?: boolean; } +function resolveSafeRelativePath(baseDir: string, candidatePath: string): string | null { + if (!candidatePath || isAbsolute(candidatePath)) { + return null; + } + + const normalized = normalize(candidatePath); + const resolved = resolve(baseDir, normalized); + const rel = relative(baseDir, resolved); + + if (rel === "" || rel === ".") { + return normalized; + } + + if (rel.startsWith(`..${sep}`) || rel === "..") { + return null; + } + + if (isAbsolute(rel)) { + return null; + } + + return rel; +} + /** * Run a single agent in a worktree */ @@ -66,6 +90,11 @@ async function runAgentInWorktree( let branchName = ""; try { + const safePrdPath = resolveSafeRelativePath(originalDir, prdFile); + if (!safePrdPath) { + throw new Error(`Invalid PRD path outside project: ${prdFile}`); + } + // Create worktree const worktree = await createAgentWorktree( task.title, @@ -80,16 +109,23 @@ async function runAgentInWorktree( logDebug(`Agent ${agentNum}: Created worktree at ${worktreeDir}`); // Copy PRD file or folder to worktree - if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") { - const srcPath = join(originalDir, prdFile); - const destPath = join(worktreeDir, prdFile); + if ( + prdSource === "markdown" || + prdSource === "yaml" || + prdSource === "json" || + prdSource === "csv" + ) { + const srcPath = join(originalDir, safePrdPath); + const destPath = join(worktreeDir, safePrdPath); if (existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); copyFileSync(srcPath, destPath); } } else if (prdSource === "markdown-folder" && prdIsFolder) { - const srcPath = join(originalDir, prdFile); - const destPath = join(worktreeDir, prdFile); + const srcPath = join(originalDir, safePrdPath); + const destPath = join(worktreeDir, safePrdPath); if (existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); cpSync(srcPath, destPath, { recursive: true }); } } @@ -161,6 +197,11 @@ async function runAgentInSandbox( const branchName = ""; try { + const safePrdPath = resolveSafeRelativePath(originalDir, prdFile); + if (!safePrdPath) { + throw new Error(`Invalid PRD path outside project: ${prdFile}`); + } + // Create sandbox const sandboxResult = await createSandbox({ originalDir, @@ -173,16 +214,23 @@ async function runAgentInSandbox( ); // Copy PRD file or folder to sandbox (same as worktree mode) - if (prdSource === "markdown" || prdSource === "yaml" || prdSource === "json") { - const srcPath = join(originalDir, prdFile); - const destPath = join(sandboxDir, prdFile); + if ( + prdSource === "markdown" || + prdSource === "yaml" || + prdSource === "json" || + prdSource === "csv" + ) { + const srcPath = join(originalDir, safePrdPath); + const destPath = join(sandboxDir, safePrdPath); if (existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); copyFileSync(srcPath, destPath); } } else if (prdSource === "markdown-folder" && prdIsFolder) { - const srcPath = join(originalDir, prdFile); - const destPath = join(sandboxDir, prdFile); + const srcPath = join(originalDir, safePrdPath); + const destPath = join(sandboxDir, safePrdPath); if (existsSync(srcPath)) { + mkdirSync(dirname(destPath), { recursive: true }); cpSync(srcPath, destPath, { recursive: true }); } } @@ -380,13 +428,13 @@ export async function runParallel( // Run agents in parallel (using sandbox or worktree mode) const promises = batch.map((task) => { - globalAgentNum++; + const agentId = ++globalAgentNum; const runInSandbox = () => runAgentInSandbox( engine, task, - globalAgentNum, + agentId, getSandboxBase(workDir), workDir, prdSource, @@ -408,7 +456,7 @@ export async function runParallel( return runAgentInWorktree( engine, task, - globalAgentNum, + agentId, baseBranch, isolationBase, workDir, @@ -424,7 +472,7 @@ export async function runParallel( engineArgs, ).then((res) => { if (shouldFallbackToSandbox(res.error)) { - logWarn(`Agent ${globalAgentNum}: Worktree unavailable, retrying in sandbox mode.`); + logWarn(`Agent ${agentId}: Worktree unavailable, retrying in sandbox mode.`); if (res.worktreeDir) { cleanupAgentWorktree(res.worktreeDir, res.branchName, workDir).catch(() => { // Ignore cleanup failures during fallback diff --git a/cli/src/execution/retry.ts b/cli/src/execution/retry.ts index 9eb4f293..a9d7707c 100644 --- a/cli/src/execution/retry.ts +++ b/cli/src/execution/retry.ts @@ -34,7 +34,7 @@ export function calculateBackoffDelay( useJitter: boolean, ): number { // Exponential backoff: baseDelay * 2^(attempt-1) - let delay = baseDelayMs * Math.pow(2, attempt - 1); + let delay = baseDelayMs * 2 ** (attempt - 1); // Cap at maximum delay delay = Math.min(delay, maxDelayMs); diff --git a/cli/src/execution/sandbox-git.ts b/cli/src/execution/sandbox-git.ts index 45ceb1e9..b4ee4a85 100644 --- a/cli/src/execution/sandbox-git.ts +++ b/cli/src/execution/sandbox-git.ts @@ -12,7 +12,7 @@ class GitMutex { private queue: Promise = Promise.resolve(); async acquire(fn: () => Promise): Promise { - let release: () => void; + let release: (() => void) | undefined; const next = new Promise((resolve) => { release = resolve; }); @@ -22,7 +22,7 @@ class GitMutex { try { return await fn(); } finally { - release!(); + release?.(); } } } diff --git a/cli/src/execution/sandbox.ts b/cli/src/execution/sandbox.ts index d27e76c9..00a9252e 100644 --- a/cli/src/execution/sandbox.ts +++ b/cli/src/execution/sandbox.ts @@ -32,21 +32,27 @@ export async function rmRF(path: string): Promise { // Using force: true and recursive: true is standard rmSync(path, { recursive: true, force: true }); return; - } catch (err: any) { - const isLockError = err.code === "EBUSY" || err.code === "EPERM" || err.code === "ENOTEMPTY"; + } catch (err: unknown) { + const errorCode = + typeof err === "object" && err !== null && "code" in err + ? String((err as { code?: string }).code) + : ""; + const isLockError = + errorCode === "EBUSY" || errorCode === "EPERM" || errorCode === "ENOTEMPTY"; if (isLockError && i < retries - 1) { // Wait with exponential backoff: 500, 1000, 2000, 4000... - const delay = 500 * Math.pow(2, i); + const delay = 500 * 2 ** i; await new Promise((resolve) => setTimeout(resolve, delay)); continue; } // On final failure for lock errors, log warning and swallow. // For non-lock errors (any time), throw immediately. + const errorMessage = err instanceof Error ? err.message : String(err); if (isLockError && i === retries - 1) { logWarn( - `Failed to clean up ${path} after ${retries} attempts: ${err.message}. This may be due to a file lock. Proceeding anyway.`, + `Failed to clean up ${path} after ${retries} attempts: ${errorMessage}. This may be due to a file lock. Proceeding anyway.`, ); } else { throw err; @@ -252,7 +258,6 @@ export function verifySandboxIsolation(sandboxDir: string, symlinkDirs: string[] const stat = lstatSync(sandboxPath); if (stat.isSymbolicLink()) { // Good - it's a symlink - continue; } } catch { // Error checking - assume not isolated diff --git a/cli/src/index.ts b/cli/src/index.ts index 5ac9431f..107170b1 100644 --- a/cli/src/index.ts +++ b/cli/src/index.ts @@ -4,7 +4,6 @@ import { addRule, showConfig } from "./cli/commands/config.ts"; import { runInit } from "./cli/commands/init.ts"; import { runLoop } from "./cli/commands/run.ts"; import { runTask } from "./cli/commands/task.ts"; -import { flushAllProgressWrites } from "./config/writer.ts"; import { logError } from "./ui/logger.ts"; async function main(): Promise { @@ -46,9 +45,6 @@ async function main(): Promise { } catch (error) { logError(error instanceof Error ? error.message : String(error)); process.exitCode = 1; - } finally { - // Ensure all progress writes are flushed before exit - await flushAllProgressWrites(); } } diff --git a/cli/src/telemetry/collector.ts b/cli/src/telemetry/collector.ts index 37b38fe9..11dd4b16 100644 --- a/cli/src/telemetry/collector.ts +++ b/cli/src/telemetry/collector.ts @@ -6,6 +6,7 @@ */ import { randomUUID } from "node:crypto"; +import { sanitizeSecrets } from "../utils/sanitization.ts"; import type { Session, SessionFull, @@ -18,6 +19,26 @@ import type { // Package version (loaded lazily) let cachedVersion: string | undefined; +function sanitizeTelemetryValue(value: unknown): unknown { + if (typeof value === "string") { + return sanitizeSecrets(value); + } + + if (Array.isArray(value)) { + return value.map((item) => sanitizeTelemetryValue(item)); + } + + if (value && typeof value === "object") { + const sanitized: Record = {}; + for (const [key, nested] of Object.entries(value)) { + sanitized[key] = sanitizeTelemetryValue(nested); + } + return sanitized; + } + + return value; +} + function getCliVersion(): string { if (cachedVersion) return cachedVersion; try { @@ -116,8 +137,8 @@ export class TelemetryCollector { // Store prompts/responses for full mode if (this.level === "full") { - if (prompt) this.prompts.push(prompt); - if (response) this.responses.push(response); + if (prompt) this.prompts.push(sanitizeSecrets(prompt)); + if (response) this.responses.push(sanitizeSecrets(response)); } } @@ -131,7 +152,10 @@ export class TelemetryCollector { startTime: Date.now(), toolName, parameterKeys: parameters ? Object.keys(parameters) : undefined, - parameters: this.level === "full" ? parameters : undefined, + parameters: + this.level === "full" + ? (sanitizeTelemetryValue(parameters) as Record | undefined) + : undefined, }; // Track file paths in full mode @@ -164,7 +188,7 @@ export class TelemetryCollector { // Add full mode data if (this.level === "full") { toolCall.parameters = this.activeToolCall.parameters; - if (result) toolCall.result = result; + if (result) toolCall.result = sanitizeSecrets(result); } this.toolCalls.push(toolCall); @@ -199,8 +223,10 @@ export class TelemetryCollector { }; if (this.level === "full") { - toolCall.parameters = options?.parameters; - toolCall.result = options?.result; + toolCall.parameters = sanitizeTelemetryValue(options?.parameters) as + | Record + | undefined; + toolCall.result = options?.result ? sanitizeSecrets(options.result) : undefined; // Track file paths if (options?.parameters) { @@ -303,7 +329,7 @@ export class TelemetryCollector { fullSession.response = this.responses.join("\n\n---\n\n"); } if (this.filePaths.size > 0) { - fullSession.filePaths = Array.from(this.filePaths); + fullSession.filePaths = Array.from(this.filePaths).map((path) => sanitizeSecrets(path)); } return { session: fullSession, toolCalls: this.toolCalls }; } diff --git a/cli/src/telemetry/exporter.ts b/cli/src/telemetry/exporter.ts index c4b4a2be..bef8331c 100644 --- a/cli/src/telemetry/exporter.ts +++ b/cli/src/telemetry/exporter.ts @@ -161,7 +161,7 @@ export class TelemetryExporter { await this.ensureExportsDir(); const filePath = outputPath || join(this.exportsDir, "openai-evals.jsonl"); - await writeFile(filePath, entries.join("\n") + "\n", "utf-8"); + await writeFile(filePath, `${entries.join("\n")}\n`, "utf-8"); return filePath; } @@ -194,7 +194,7 @@ export class TelemetryExporter { await this.ensureExportsDir(); const filePath = outputPath || join(this.exportsDir, "raw-telemetry.jsonl"); - const lines = entries.map((e) => JSON.stringify(e)).join("\n") + "\n"; + const lines = `${entries.map((e) => JSON.stringify(e)).join("\n")}\n`; await writeFile(filePath, lines, "utf-8"); return filePath; diff --git a/cli/src/telemetry/types.ts b/cli/src/telemetry/types.ts index 41650f3c..11424fa8 100644 --- a/cli/src/telemetry/types.ts +++ b/cli/src/telemetry/types.ts @@ -78,6 +78,51 @@ export interface ToolCall { */ export type TelemetryLevel = "anonymous" | "full"; +/** + * Full session data for webhook + */ +export interface WebhookSessionData { + sessionId: string; + engine: string; + mode: string; + cliVersion: string; + platform: string; + totalTokensIn: number; + totalTokensOut: number; + totalDurationMs: number; + taskCount: number; + successCount: number; + failedCount: number; + toolCalls: { + toolName: string; + callCount: number; + successCount: number; + failedCount: number; + avgDurationMs: number; + }[]; + tags?: string[]; +} + +/** + * Full session details for webhook (full privacy mode) + */ +export interface WebhookSessionDetails { + prompt?: string; + response?: string; + filePaths?: string[]; +} + +/** + * Telemetry webhook payload + */ +export interface TelemetryWebhookPayload { + event: string; + version: string; + timestamp: string; + session: WebhookSessionData; + details?: WebhookSessionDetails; +} + /** * Telemetry configuration */ diff --git a/cli/src/telemetry/webhook.ts b/cli/src/telemetry/webhook.ts index c305e0e0..6ce965d3 100644 --- a/cli/src/telemetry/webhook.ts +++ b/cli/src/telemetry/webhook.ts @@ -76,11 +76,18 @@ export async function sendTelemetryWebhook( } const payload = buildPayload(session, level); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout + const safeWebhookTarget = (() => { + try { + const parsed = new URL(webhookUrl); + return `${parsed.protocol}//${parsed.host}`; + } catch { + return "[invalid-webhook-url]"; + } + })(); try { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout - const response = await fetch(webhookUrl, { method: "POST", headers: { @@ -90,14 +97,12 @@ export async function sendTelemetryWebhook( signal: controller.signal, }); - clearTimeout(timeoutId); - if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`HTTP ${response.status}${text ? `: ${text}` : ""}`); } - logDebug(`Telemetry webhook sent successfully to ${webhookUrl}`); + logDebug(`Telemetry webhook sent successfully to ${safeWebhookTarget}`); } catch (error) { if (error instanceof Error && error.name === "AbortError") { logError("Telemetry webhook timed out after 10 seconds"); @@ -107,5 +112,7 @@ export async function sendTelemetryWebhook( ); } // Don't throw - webhook failures shouldn't break the session + } finally { + clearTimeout(timeoutId); } } diff --git a/cli/src/telemetry/writer.ts b/cli/src/telemetry/writer.ts index ab64c48f..00722c21 100644 --- a/cli/src/telemetry/writer.ts +++ b/cli/src/telemetry/writer.ts @@ -7,6 +7,7 @@ import { existsSync } from "node:fs"; import { appendFile, mkdir, readFile, readdir } from "node:fs/promises"; import { dirname, join } from "node:path"; +import { logDebug } from "../ui/logger.ts"; import type { Session, SessionFull, ToolCall } from "./types.js"; const DEFAULT_OUTPUT_DIR = ".ralphy/telemetry"; @@ -56,7 +57,7 @@ export class TelemetryWriter { async writeSession(session: Session | SessionFull): Promise { await this.ensureDir(); const path = join(this.outputDir, SESSIONS_FILE); - const line = JSON.stringify(session) + "\n"; + const line = `${JSON.stringify(session)}\n`; await appendFile(path, line, "utf-8"); } @@ -68,7 +69,7 @@ export class TelemetryWriter { await this.ensureDir(); const path = join(this.outputDir, TOOL_CALLS_FILE); - const lines = toolCalls.map((call) => JSON.stringify(call)).join("\n") + "\n"; + const lines = `${toolCalls.map((call) => JSON.stringify(call)).join("\n")}\n`; await appendFile(path, lines, "utf-8"); } @@ -92,7 +93,16 @@ export class TelemetryWriter { const content = await readFile(path, "utf-8"); const lines = content.trim().split("\n").filter(Boolean); - return lines.map((line) => JSON.parse(line) as Session | SessionFull); + const sessions: Array = []; + for (const line of lines) { + try { + sessions.push(JSON.parse(line) as Session | SessionFull); + } catch (error) { + logDebug(`Failed to parse telemetry session line: ${error}`); + } + } + + return sessions; } /** @@ -108,7 +118,16 @@ export class TelemetryWriter { const content = await readFile(path, "utf-8"); const lines = content.trim().split("\n").filter(Boolean); - return lines.map((line) => JSON.parse(line) as ToolCall); + const toolCalls: ToolCall[] = []; + for (const line of lines) { + try { + toolCalls.push(JSON.parse(line) as ToolCall); + } catch (error) { + logDebug(`Failed to parse telemetry tool-call line: ${error}`); + } + } + + return toolCalls; } /** diff --git a/cli/src/ui/logger.ts b/cli/src/ui/logger.ts index 324e719d..116f0ef2 100644 --- a/cli/src/ui/logger.ts +++ b/cli/src/ui/logger.ts @@ -1,49 +1,421 @@ +import { appendFileSync } from "node:fs"; +import path from "node:path"; import pc from "picocolors"; +import { sanitizeSecrets } from "../utils/sanitization.ts"; -let verboseMode = false; +// Use a module-level object for state to avoid direct mutable exports +const loggerState = { + verboseMode: false, + debugMode: false, + explicitVerbose: false, +}; + +// Allowed log directory - logs can only be written here +const ALLOWED_LOG_DIR = "logs"; + +/** + * Validate log file path to prevent path traversal attacks + */ +function validateLogPath(filePath: string): string { + const resolved = path.resolve(filePath); + const allowedDir = path.resolve(process.cwd(), ALLOWED_LOG_DIR); + const relative = path.relative(allowedDir, resolved); + + if (relative.startsWith("..") || path.isAbsolute(relative)) { + throw new Error(`Invalid log file path: ${filePath} must be within ${ALLOWED_LOG_DIR}`); + } + + return resolved; +} + +/** + * Get current verbose mode state + */ +export function isVerbose(): boolean { + return loggerState.verboseMode; +} + +/** + * Get current debug mode state + */ +export function isDebug(): boolean { + return loggerState.debugMode; +} /** * Set verbose mode */ export function setVerbose(verbose: boolean): void { - verboseMode = verbose; + loggerState.verboseMode = verbose; + loggerState.explicitVerbose = true; + verboseMode = loggerState.verboseMode; +} + +/** + * Set debug mode (implies verbose) + */ +export function setDebug(debug: boolean): void { + loggerState.debugMode = debug; + if (debug) { + loggerState.verboseMode = true; + } else if (!loggerState.explicitVerbose) { + loggerState.verboseMode = false; + } + verboseMode = loggerState.verboseMode; +} + +// BUG FIX: Export a getter function instead of a stale primitive value +// This ensures consumers always get the current state, not the initial value +export function getVerboseMode(): boolean { + return loggerState.verboseMode; +} + +// Keep backward compatibility export but mark as deprecated +/** @deprecated Use getVerboseMode() instead for live value */ +export let verboseMode = loggerState.verboseMode; + +/** + * Log levels for structured logging + */ +export type LogLevel = "debug" | "info" | "success" | "warn" | "error"; + +/** + * Structured log entry interface + */ +export interface LogEntry { + timestamp: string; + level: LogLevel; + component: string; + message: string; + context?: Record; +} + +/** + * Log sink interface for extensible logging + */ +export interface LogSink { + write(entry: LogEntry): void; +} + +/** + * Default console log sink with colors + */ +class ConsoleLogSink implements LogSink { + write(entry: LogEntry): void { + // Defensive: validate entry has required fields + if (!entry || typeof entry !== "object") { + console.error("[Logger] Invalid log entry"); + return; + } + const timestamp = entry.timestamp ?? new Date().toISOString(); + const level = entry.level ?? "info"; + const component = entry.component ?? "ralphy"; + const message = entry.message ?? ""; + const prefix = `[${timestamp}] [${level.toUpperCase()}]`; + + switch (level) { + case "error": + console.error(pc.red(`${prefix} ${component ? `[${component}] ` : ""}${message}`)); + break; + case "warn": + console.warn(pc.yellow(`${prefix} ${component ? `[${component}] ` : ""}${message}`)); + break; + case "success": + console.log(pc.green(`[OK] ${message}`)); + break; + case "info": + console.log(pc.blue(`${prefix} ${component ? `[${component}] ` : ""}${message}`)); + break; + case "debug": + console.log(pc.gray(`${prefix} ${component ? `[${component}] ` : ""}${message}`)); + break; + default: + console.log(`${prefix} ${component ? `[${component}] ` : ""}${message}`); + } + } +} + +// Global log sink instance +let logSink: LogSink = new ConsoleLogSink(); + +/** + * Set a custom log sink for extensible logging + */ +export function setLogSink(sink: LogSink): void { + logSink = sink; +} + +/** + * Get current log sink + */ +export function getLogSink(): LogSink { + return logSink; +} + +/** + * Internal function to create log entry + * Sanitizes secrets from logged data + */ +function createLogEntry(level: LogLevel, component: string | undefined, args: unknown[]): LogEntry { + const safeSerialize = (arg: unknown): string => { + if (typeof arg === "string") { + return arg; + } + + try { + return JSON.stringify(arg); + } catch { + return String(arg); + } + }; + + const rawMessage = args.map((arg) => safeSerialize(arg)).join(" "); + // Sanitize secrets from the message + const message = sanitizeSecrets(rawMessage); + + return { + timestamp: new Date().toISOString(), + level, + component: component || "ralphy", + message, + }; +} + +/** + * Core logging function + */ +function log(level: LogLevel, component: string | undefined, ...args: unknown[]): void { + // Debug messages only show in verbose or debug mode + if (level === "debug" && !loggerState.verboseMode) { + return; + } + + const entry = createLogEntry(level, component, args); + logSink.write(entry); } /** * Log info message */ export function logInfo(...args: unknown[]): void { - console.log(pc.blue("[INFO]"), ...args); + log("info", undefined, ...args); +} + +/** + * Log info message with component context + */ +export function logInfoContext(component: string, ...args: unknown[]): void { + log("info", component, ...args); } /** * Log success message */ export function logSuccess(...args: unknown[]): void { - console.log(pc.green("[OK]"), ...args); + log("success", undefined, ...args); +} + +/** + * Log success message with component context + */ +export function logSuccessContext(component: string, ...args: unknown[]): void { + log("success", component, ...args); } /** * Log warning message */ export function logWarn(...args: unknown[]): void { - console.log(pc.yellow("[WARN]"), ...args); + log("warn", undefined, ...args); +} + +/** + * Log warning message with component context + */ +export function logWarnContext(component: string, ...args: unknown[]): void { + log("warn", component, ...args); } /** * Log error message */ export function logError(...args: unknown[]): void { - console.error(pc.red("[ERROR]"), ...args); + log("error", undefined, ...args); +} + +/** + * Log error message with component context + */ +export function logErrorContext(component: string, ...args: unknown[]): void { + log("error", component, ...args); } /** * Log debug message (only in verbose mode) */ export function logDebug(...args: unknown[]): void { - if (verboseMode) { - console.log(pc.dim("[DEBUG]"), ...args); + log("debug", undefined, ...args); +} + +/** + * Log debug message with component context + */ +export function logDebugContext(component: string, ...args: unknown[]): void { + log("debug", component, ...args); +} + +/** + * JSON file log sink for structured logging to file + */ +export class JsonFileLogSink implements LogSink { + private filePath: string; + private buffer: LogEntry[] = []; + private flushInterval: number; + private maxBufferSize: number; + // BUG FIX: Use proper nullable type instead of type cast hack + private flushTimer: NodeJS.Timeout | null = null; + + constructor(filePath: string, options?: { flushIntervalMs?: number; maxBufferSize?: number }) { + // Validate path to prevent path traversal attacks + this.filePath = validateLogPath(filePath); + this.flushInterval = options?.flushIntervalMs ?? 1000; + this.maxBufferSize = options?.maxBufferSize ?? 100; + + // Auto-flush buffer periodically + this.flushTimer = setInterval(() => { + try { + this.flush(); + } catch (err) { + console.error(`Failed to flush log buffer: ${err}`); + } + }, this.flushInterval); + } + + private isFlushing = false; + + write(entry: LogEntry): void { + this.buffer.push(entry); + + if (this.buffer.length >= this.maxBufferSize) { + this.flush(); + } } + + private flush(): void { + // Prevent concurrent flushes (race condition fix) + if (this.isFlushing || this.buffer.length === 0) return; + + this.isFlushing = true; + let currentBuffer: LogEntry[] = []; + + try { + // ATOMIC: Swap buffers instead of copy-then-clear + // This prevents race conditions where write() is called between copy and clear + currentBuffer = this.buffer; + this.buffer = []; // New empty buffer assigned atomically + const lines = `${currentBuffer.map((entry) => JSON.stringify(entry)).join("\n")}\n`; + appendFileSync(this.filePath, lines, "utf-8"); + } catch (error) { + console.error(`Failed to write to log file: ${error}`); + // Limit buffer size to prevent memory exhaustion on persistent write failures + const MAX_BUFFER_SIZE = 10000; + const combined = [...currentBuffer, ...this.buffer]; + // Keep only the most recent entries, discard oldest to prevent memory leak + if (combined.length > MAX_BUFFER_SIZE) { + this.buffer = combined.slice(-MAX_BUFFER_SIZE); + console.warn(`Log buffer truncated to ${MAX_BUFFER_SIZE} entries due to write failure`); + } else { + this.buffer = combined; + } + } finally { + this.isFlushing = false; + } + } + + /** + * Dispose of the file log sink, stopping the flush timer. + * Call this when done logging to prevent memory leaks. + */ + dispose(): void { + // BUG FIX: Proper nullable type check without type cast hack + if (this.flushTimer !== null) { + clearInterval(this.flushTimer); + this.flushTimer = null; + } + // Final flush to ensure all logs are written + this.flush(); + } +} + +/** + * Multi-sink that writes to multiple log sinks + */ +export class MultiLogSink implements LogSink { + private sinks: LogSink[]; + + constructor(sinks: LogSink[]) { + this.sinks = sinks; + } + + write(entry: LogEntry): void { + for (const sink of this.sinks) { + try { + sink.write(entry); + } catch (error) { + console.error(`Log sink failed: ${error}`); + } + } + } + + addSink(sink: LogSink): void { + this.sinks.push(sink); + } +} + +/** + * Filtered log sink that only passes certain log levels + */ +export class FilteredLogSink implements LogSink { + private sink: LogSink; + private minLevel: LogLevel; + private levelPriority: Record = { + debug: 0, + info: 1, + success: 2, + warn: 3, + error: 4, + }; + + constructor(sink: LogSink, minLevel: LogLevel) { + this.sink = sink; + this.minLevel = minLevel; + } + + write(entry: LogEntry): void { + if (this.levelPriority[entry.level] >= this.levelPriority[this.minLevel]) { + this.sink.write(entry); + } + } +} + +/** + * Initialize structured logging with file output + * @param logFilePath - Path to JSON log file (optional) + * @param minLevel - Minimum log level to record (default: "info") + */ +export function initializeStructuredLogging( + logFilePath?: string, + minLevel: LogLevel = "info", +): void { + const sinks: LogSink[] = [new ConsoleLogSink()]; + + if (logFilePath) { + const fileSink = new JsonFileLogSink(logFilePath); + const filteredFileSink = new FilteredLogSink(fileSink, minLevel); + sinks.push(filteredFileSink); + } + + setLogSink(new MultiLogSink(sinks)); } /** diff --git a/cli/src/utils/cleanup.ts b/cli/src/utils/cleanup.ts new file mode 100644 index 00000000..09e598f1 --- /dev/null +++ b/cli/src/utils/cleanup.ts @@ -0,0 +1,152 @@ +import type { ChildProcess } from "node:child_process"; +import { spawnSync } from "node:child_process"; +import { logDebug, logWarn } from "../ui/logger.ts"; + +type CleanupFn = () => Promise | void; + +const cleanupRegistry: Set = new Set(); +const trackedProcesses: Set = new Set(); +let isCleaningUp = false; + +function isProcessRunning(proc: ChildProcess): boolean { + return proc.exitCode === null && proc.signalCode === null; +} + +/** + * Register a function to be called on process exit or manual cleanup + */ +export function registerCleanup(fn: CleanupFn): () => void { + cleanupRegistry.add(fn); + return () => cleanupRegistry.delete(fn); +} + +/** + * Register a child process to be tracked and killed on exit + */ +export function registerProcess(proc: ChildProcess): () => void { + trackedProcesses.add(proc); + + const remove = () => trackedProcesses.delete(proc); + + proc.on("exit", remove); + proc.on("error", remove); + + return remove; +} + +/** + * Run all registered cleanup functions and kill tracked processes + */ +export async function runCleanup(): Promise { + if (isCleaningUp) return; + isCleaningUp = true; + + // 1. Kill all tracked child processes with verification + for (const proc of trackedProcesses) { + try { + if (proc.pid && isProcessRunning(proc)) { + const pid = proc.pid; + + if (process.platform === "win32") { + // Windows needs taskkill for robust child tree termination + const result = spawnSync("taskkill", ["/pid", String(pid), "/f", "/t"], { + stdio: "pipe", + }); + + // Verify the process was actually killed + // Status 128 = process already exited, which is fine + if (result.status !== 0 && result.status !== 128) { + logWarn(`taskkill may have failed for PID ${pid} (exit code: ${result.status})`); + if (result.stderr) { + logDebug(`taskkill stderr: ${result.stderr.toString()}`); + } + } + + await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(proc)) { + logWarn(`Process ${pid} may still be running after taskkill`); + } + } else { + // Try graceful termination first + proc.kill("SIGTERM"); + + // Wait a bit and verify it's dead + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Check if process is still running + if (isProcessRunning(proc)) { + proc.kill("SIGKILL"); + + // Final verification + await new Promise((resolve) => setTimeout(resolve, 500)); + if (isProcessRunning(proc)) { + logWarn(`Failed to terminate process ${pid} after SIGKILL`); + } + } + } + } + } catch (err) { + // Process termination failed, continue cleanup + logDebug(`Failed to terminate process ${proc.pid}: ${err}`); + } + } + trackedProcesses.clear(); + + // 2. Run registered cleanup functions + const promises: Promise[] = []; + for (const fn of cleanupRegistry) { + try { + const result = fn(); + if (result instanceof Promise) { + promises.push(result); + } + } catch (err) { + // Log sync errors but continue with other cleanup functions + promises.push(Promise.reject(err)); + } + } + + await Promise.allSettled(promises); + cleanupRegistry.clear(); + isCleaningUp = false; +} + +let isShuttingDown = false; +let signalHandlersRegistered = false; + +/** + * Setup process signal handlers for cleanup + */ +export function setupSignalHandlers(): void { + if (signalHandlersRegistered) { + return; + } + signalHandlersRegistered = true; + + const signals: NodeJS.Signals[] = ["SIGINT", "SIGTERM"]; + + for (const signal of signals) { + process.on(signal, async () => { + // Prevent duplicate cleanup runs + if (isShuttingDown) { + process.stdout.write(`\nReceived ${signal}, cleanup already in progress...\n`); + return; + } + isShuttingDown = true; + + // Use writeSync to avoid event loop issues during exit + process.stdout.write(`\nReceived ${signal}, cleaning up processes and files...\n`); + + try { + await runCleanup(); + process.exit(0); + } catch (error) { + process.stderr.write(`\nCleanup failed: ${error}\n`); + process.exit(1); + } + }); + } + + // Note: uncaughtException is handled in cli/src/index.ts for the main process + // This avoids duplicate handlers and ensures consistent error handling +} diff --git a/cli/src/utils/errors.ts b/cli/src/utils/errors.ts new file mode 100644 index 00000000..d5aa81a6 --- /dev/null +++ b/cli/src/utils/errors.ts @@ -0,0 +1,131 @@ +/** + * Standardized error handling utilities for consistent error types across the codebase + */ + +export class RalphyError extends Error { + public readonly code: string; + public readonly context?: Record; + + constructor(message: string, code = "RALPHY_ERROR", context?: Record) { + super(message); + this.name = "RalphyError"; + this.code = code; + this.context = context; + + // Maintains proper stack trace for where our error was thrown (only available on V8) + if (Error.captureStackTrace) { + Error.captureStackTrace(this, RalphyError); + } + } +} + +export class ValidationError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "VALIDATION_ERROR", context); + this.name = "ValidationError"; + } +} + +export class TimeoutError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "TIMEOUT_ERROR", context); + this.name = "TimeoutError"; + } +} + +export class LockError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "LOCK_ERROR", context); + this.name = "LockError"; + } +} + +export class ProcessError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "PROCESS_ERROR", context); + this.name = "ProcessError"; + } +} + +export class SandboxError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "SANDBOX_ERROR", context); + this.name = "SandboxError"; + } +} + +/** + * Convert any error to a standardized format + */ +export function standardizeError(error: unknown): RalphyError { + if (error instanceof RalphyError) { + return error; + } + + if (error instanceof Error) { + return new RalphyError(error.message, "UNKNOWN_ERROR", { + originalName: error.name, + originalStack: error.stack, + }); + } + + if (typeof error === "string") { + return new RalphyError(error, "STRING_ERROR"); + } + + return new RalphyError(String(error), "UNKNOWN_ERROR", { originalType: typeof error }); +} + +/** + * Check if an error is retryable + */ +export function isRetryableError(error: unknown): boolean { + const standardized = standardizeError(error); + + const retryableCodes = ["TIMEOUT_ERROR", "LOCK_ERROR", "PROCESS_ERROR", "NETWORK_ERROR", "RATE_LIMIT_ERROR"]; + + const retryableMessages = [ + "timeout", + "connection refused", + "network", + "rate limit", + "too many requests", + "temporary failure", + "try again", + "locked", + "conflict", + "connection error", + "unable to connect", + "internet connection", + "econnrefused", + "econnreset", + "socket hang up", + "fetch failed", + ]; + + const message = standardized.message.toLowerCase(); + + // Check error code + if (retryableCodes.includes(standardized.code)) { + return true; + } + + // Check error message + return retryableMessages.some((pattern) => message.includes(pattern)); +} + +/** + * Create error with context for logging + */ +export function createErrorWithContext(error: unknown, context: Record): RalphyError { + const standardized = standardizeError(error); + + if (standardized.context) { + return new RalphyError(standardized.message, standardized.code, { + ...standardized.context, + ...context, + }); + } + + return new RalphyError(standardized.message, standardized.code, context); +} diff --git a/cli/src/utils/hooks.ts b/cli/src/utils/hooks.ts new file mode 100644 index 00000000..e34f60f5 --- /dev/null +++ b/cli/src/utils/hooks.ts @@ -0,0 +1,367 @@ +import { randomBytes } from "node:crypto"; +import type { AIResult } from "../engines/types.ts"; +import type { Task } from "../tasks/types.ts"; +import { logDebugContext, logErrorContext } from "../ui/logger.ts"; + +/** + * Hook system for Ralphy CLI lifecycle events + * + * Provides a plugin-like mechanism for extending functionality + * without modifying core code. Supports: + * - Task lifecycle hooks (start, complete, fail) + * - Execution hooks (before, after) + * - Engine hooks (pre-execute, post-execute) + * - Custom hooks for user-defined extensions + */ + +/** + * Hook context passed to all hook handlers + */ +export interface HookContext { + timestamp: number; + [key: string]: unknown; +} + +/** + * Task start hook context + */ +export interface TaskStartContext extends HookContext { + task: Task; + workDir: string; + engine: string; +} + +/** + * Task complete hook context + */ +export interface TaskCompleteContext extends HookContext { + task: Task; + result: AIResult; + workDir: string; + durationMs: number; +} + +/** + * Task fail hook context + */ +export interface TaskFailContext extends HookContext { + task: Task; + error: Error | string; + workDir: string; + attempt: number; + maxAttempts: number; +} + +/** + * Engine execute hook context + */ +export interface EngineExecuteContext extends HookContext { + engine: string; + prompt: string; + workDir: string; + options?: Record; +} + +/** + * Engine result hook context + */ +export interface EngineResultContext extends HookContext { + engine: string; + result: AIResult; + durationMs: number; +} + +/** + * Hook handler type + */ +export type HookHandler = (context: T) => Promise | void; + +/** + * Available hook names + */ +export type HookName = + | "task:start" + | "task:complete" + | "task:fail" + | "task:skip" + | "engine:pre-execute" + | "engine:post-execute" + | "queue:enqueue" + | "queue:dequeue" + | "config:load" + | "config:save" + | "git:pre-commit" + | "git:post-commit" + | "notification:send"; + +/** + * Hook registry entry + */ +interface HookEntry { + name: string; + handler: HookHandler; + priority: number; +} + +/** + * Hook manager + */ +export class HookManager { + private hooks: Map[]> = new Map(); + private enabled: boolean = true; + + /** + * Register a hook handler + */ + register( + hookName: HookName, + handler: HookHandler, + options?: { priority?: number; name?: string }, + ): () => void { + const entry: HookEntry = { + name: options?.name || `hook-${Date.now()}-${randomBytes(9).toString("base64url").slice(0, 12)}`, + handler: handler as HookHandler, + priority: options?.priority ?? 0, + }; + + const existing = this.hooks.get(hookName) || []; + existing.push(entry); + + // Sort by priority (higher first) + existing.sort((a, b) => b.priority - a.priority); + + this.hooks.set(hookName, existing); + + logDebugContext("Hooks", `Registered hook: ${hookName} (${entry.name})`); + + // Return unregister function + return () => { + const hooks = this.hooks.get(hookName) || []; + const index = hooks.findIndex((h) => h.name === entry.name); + if (index !== -1) { + hooks.splice(index, 1); + logDebugContext("Hooks", `Unregistered hook: ${hookName} (${entry.name})`); + } + }; + } + + /** + * Execute all handlers for a hook + */ + async execute(name: HookName, context: T): Promise { + if (!this.enabled) { + return; + } + + const hooks = this.hooks.get(name) || []; + if (hooks.length === 0) { + return; + } + + logDebugContext("Hooks", `Executing ${hooks.length} hooks for: ${name}`); + + for (const hook of hooks) { + try { + const result = hook.handler(context); + // Ensure we await async handlers + if (result instanceof Promise) { + await result; + } + } catch (error) { + logErrorContext("Hooks", `Hook ${hook.name} failed for ${name}: ${error}`); + // Continue with other hooks even if one fails + } + } + } + + /** + * Check if any hooks are registered for a name + */ + hasHooks(name: HookName): boolean { + const hooks = this.hooks.get(name); + return hooks !== undefined && hooks.length > 0; + } + + /** + * Get number of registered hooks for a name + */ + getHookCount(name: HookName): number { + return this.hooks.get(name)?.length || 0; + } + + /** + * Enable/disable all hooks + */ + setEnabled(enabled: boolean): void { + this.enabled = enabled; + logDebugContext("Hooks", `Hooks ${enabled ? "enabled" : "disabled"}`); + } + + /** + * Clear all hooks + */ + clear(): void { + this.hooks.clear(); + logDebugContext("Hooks", "All hooks cleared"); + } + + /** + * Get all registered hook names + */ + getRegisteredHooks(): HookName[] { + return Array.from(this.hooks.keys()); + } +} + +/** + * Global hook manager instance + */ +let globalHookManager: HookManager = new HookManager(); + +/** + * Set global hook manager + */ +export function setHookManager(manager: HookManager): void { + globalHookManager = manager; +} + +/** + * Get global hook manager + */ +export function getHookManager(): HookManager { + return globalHookManager; +} + +/** + * Register a hook (convenience function) + */ +export function registerHook( + name: HookName, + handler: HookHandler, + options?: { priority?: number; name?: string }, +): () => void { + return globalHookManager.register(name, handler, options); +} + +/** + * Execute hooks (convenience function) + */ +export async function executeHooks(name: HookName, context: T): Promise { + return globalHookManager.execute(name, context); +} + +/** + * Create a plugin interface + */ +export interface RalphyPlugin { + /** + * Plugin name + */ + name: string; + + /** + * Plugin version + */ + version: string; + + /** + * Initialize plugin - called when plugin is registered + */ + initialize?(): Promise | void; + + /** + * Register hooks - called during plugin registration + */ + registerHooks(hookManager: HookManager): void; + + /** + * Shutdown plugin - called when plugin is unregistered + */ + shutdown?(): Promise | void; +} + +/** + * Plugin manager + */ +export class PluginManager { + private plugins: Map = new Map(); + private hookManager: HookManager; + + constructor(hookManager: HookManager) { + this.hookManager = hookManager; + } + + /** + * Register a plugin + */ + async register(plugin: RalphyPlugin): Promise { + if (this.plugins.has(plugin.name)) { + throw new Error(`Plugin ${plugin.name} is already registered`); + } + + // Initialize plugin with proper error handling + if (plugin.initialize) { + try { + await plugin.initialize(); + } catch (error) { + logErrorContext("Plugins", `Plugin ${plugin.name} initialization failed: ${error}`); + throw error; // Re-throw to prevent registration of failed plugin + } + } + + // Register hooks with error handling + try { + plugin.registerHooks(this.hookManager); + } catch (error) { + logErrorContext("Plugins", `Plugin ${plugin.name} hook registration failed: ${error}`); + throw error; + } + + // Store plugin + this.plugins.set(plugin.name, plugin); + + logDebugContext("Plugins", `Registered plugin: ${plugin.name} v${plugin.version}`); + } + + /** + * Unregister a plugin + */ + async unregister(pluginName: string): Promise { + const plugin = this.plugins.get(pluginName); + if (!plugin) { + throw new Error(`Plugin ${pluginName} is not registered`); + } + + // Shutdown plugin + if (plugin.shutdown) { + await plugin.shutdown(); + } + + this.plugins.delete(pluginName); + + logDebugContext("Plugins", `Unregistered plugin: ${pluginName}`); + } + + /** + * Get registered plugin + */ + getPlugin(name: string): RalphyPlugin | undefined { + return this.plugins.get(name); + } + + /** + * Get all registered plugins + */ + getAllPlugins(): RalphyPlugin[] { + return Array.from(this.plugins.values()); + } + + /** + * Check if plugin is registered + */ + hasPlugin(name: string): boolean { + return this.plugins.has(name); + } +} + +// HookManager is already exported above diff --git a/cli/src/utils/metrics.ts b/cli/src/utils/metrics.ts new file mode 100644 index 00000000..0318dc35 --- /dev/null +++ b/cli/src/utils/metrics.ts @@ -0,0 +1,449 @@ +/** + * Metrics and Tracing System for Ralphy CLI + * + * Provides observability with: + * - Metrics collection (counters, gauges, histograms) + * - Distributed tracing (OpenTelemetry-style spans) + * - Multiple exporters (console, file, Prometheus) + */ + +import { randomBytes } from "node:crypto"; +import { logDebugContext } from "../ui/logger.ts"; + +/** + * Metric types + */ +export type MetricType = "counter" | "gauge" | "histogram"; + +/** + * Metric value + */ +export type MetricValue = number | Record | HistogramValue; + +/** + * Metric labels/tags + */ +export interface MetricLabels { + [key: string]: string | number | boolean; +} + +/** + * Metric definition + */ +export interface Metric { + name: string; + type: MetricType; + description: string; + unit?: string; + labels?: MetricLabels; + value: MetricValue; + timestamp: number; +} + +/** + * Histogram bucket + */ +export interface HistogramBucket { + le: number; // less than or equal + count: number; +} + +/** + * Histogram value + */ +export interface HistogramValue { + count: number; + sum: number; + buckets: HistogramBucket[]; +} + +/** + * Span context for distributed tracing + */ +export interface SpanContext { + traceId: string; + spanId: string; + parentSpanId?: string; + sampled: boolean; +} + +/** + * Span status + */ +export type SpanStatus = "unset" | "ok" | "error"; + +/** + * Span event + */ +export interface SpanEvent { + timestamp: number; + name: string; + attributes?: Record; +} + +/** + * Span representation + */ +export interface Span { + context: SpanContext; + name: string; + startTime: number; + endTime?: number; + status: SpanStatus; + attributes: Record; + events: SpanEvent[]; + links?: SpanContext[]; +} + +/** + * Metrics collector interface + */ +export interface MetricsCollector { + /** + * Increment a counter metric + */ + counter(name: string, value?: number, labels?: MetricLabels): void; + + /** + * Set a gauge metric value + */ + gauge(name: string, value: number, labels?: MetricLabels): void; + + /** + * Record a histogram observation + */ + histogram(name: string, value: number, labels?: MetricLabels): void; + + /** + * Get all collected metrics + */ + getMetrics(): Metric[]; + + /** + * Clear all metrics + */ + clear(): void; +} + +/** + * Tracer interface + */ +export interface Tracer { + /** + * Start a new span + */ + startSpan(name: string, parentContext?: SpanContext, attributes?: Record): Span; + + /** + * End a span + */ + endSpan(span: Span, status?: SpanStatus): void; + + /** + * Add event to span + */ + addEvent(span: Span, name: string, attributes?: Record): void; + + /** + * Get active span + */ + getActiveSpan(): Span | null; + + /** + * Set active span + */ + setActiveSpan(span: Span | null): void; +} + +/** + * Exporter interface for metrics and traces + */ +export interface Exporter { + /** + * Export metrics + */ + exportMetrics(metrics: Metric[]): Promise; + + /** + * Export spans + */ + exportSpans(spans: Span[]): Promise; + + /** + * Shutdown exporter + */ + shutdown(): Promise; +} + +/** + * In-memory metrics collector implementation + */ +class InMemoryMetricsCollector implements MetricsCollector { + private metrics: Map = new Map(); + private histogramBuckets: Map = new Map(); + private static readonly MAX_HISTOGRAM_SAMPLES = 10000; + + counter(name: string, value = 1, labels?: MetricLabels): void { + const key = this.getMetricKey(name, labels); + const existing = this.metrics.get(key); + + if (existing && existing.type === "counter") { + existing.value = (existing.value as number) + value; + } else { + this.metrics.set(key, { + name, + type: "counter", + description: `Counter metric: ${name}`, + labels, + value, + timestamp: Date.now(), + }); + } + + logDebugContext("Metrics", `Counter ${name}: +${value}`); + } + + gauge(name: string, value: number, labels?: MetricLabels): void { + const key = this.getMetricKey(name, labels); + + this.metrics.set(key, { + name, + type: "gauge", + description: `Gauge metric: ${name}`, + labels, + value, + timestamp: Date.now(), + }); + + logDebugContext("Metrics", `Gauge ${name}: ${value}`); + } + + histogram(name: string, value: number, labels?: MetricLabels): void { + const key = this.getMetricKey(name, labels); + const bucketKey = `${key}_buckets`; + + // Store raw values for histogram calculation (capped) + const buckets = this.histogramBuckets.get(bucketKey) || []; + buckets.push(value); + if (buckets.length > InMemoryMetricsCollector.MAX_HISTOGRAM_SAMPLES) { + buckets.shift(); + } + this.histogramBuckets.set(bucketKey, buckets); + + // Calculate histogram stats + const sum = buckets.reduce((a, b) => a + b, 0); + const sorted = [...buckets].sort((a, b) => a - b); + + // Create default buckets: 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10 + const bucketBoundaries = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]; + const histogramBuckets: HistogramBucket[] = bucketBoundaries.map((le) => ({ + le, + count: sorted.filter((v) => v <= le).length, + })); + // Add +Inf bucket + histogramBuckets.push({ le: Infinity, count: buckets.length }); + + const histogramValue: HistogramValue = { + count: buckets.length, + sum, + buckets: histogramBuckets, + }; + + this.metrics.set(key, { + name, + type: "histogram", + description: `Histogram metric: ${name}`, + labels, + value: histogramValue, + timestamp: Date.now(), + }); + + logDebugContext("Metrics", `Histogram ${name}: ${value}`); + } + + getMetrics(): Metric[] { + return Array.from(this.metrics.values()); + } + + clear(): void { + this.metrics.clear(); + this.histogramBuckets.clear(); + } + + private getMetricKey(name: string, labels?: MetricLabels): string { + if (!labels || Object.keys(labels).length === 0) { + return name; + } + const labelStr = Object.entries(labels) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => `${k}=${v}`) + .join(","); + return `${name}{${labelStr}}`; + } +} + +/** + * Simple tracer implementation + */ +class SimpleTracer implements Tracer { + private spans: Span[] = []; + private activeSpan: Span | null = null; + private idCounter = 0; + + startSpan(name: string, parentContext?: SpanContext, attributes?: Record): Span { + const spanId = this.generateId(); + const traceId = parentContext?.traceId || this.generateId(); + + const span: Span = { + context: { + traceId, + spanId, + parentSpanId: parentContext?.spanId, + sampled: parentContext?.sampled ?? true, + }, + name, + startTime: Date.now(), + status: "unset", + attributes: attributes || {}, + events: [], + }; + + this.spans.push(span); + logDebugContext("Tracer", `Started span: ${name} (${spanId})`); + + return span; + } + + endSpan(span: Span, status: SpanStatus = "ok"): void { + span.endTime = Date.now(); + span.status = status; + logDebugContext("Tracer", `Ended span: ${span.name} (${span.context.spanId}) - ${status}`); + } + + addEvent(span: Span, name: string, attributes?: Record): void { + span.events.push({ + timestamp: Date.now(), + name, + attributes, + }); + logDebugContext("Tracer", `Event in ${span.name}: ${name}`); + } + + getActiveSpan(): Span | null { + return this.activeSpan; + } + + setActiveSpan(span: Span | null): void { + this.activeSpan = span; + } + + getSpans(): Span[] { + return this.spans; + } + + private generateId(): string { + return `${Date.now().toString(36)}-${(++this.idCounter).toString(36)}-${randomBytes(9).toString("base64url").slice(0, 12)}`; + } +} + +/** + * Global metrics collector instance + */ +let globalMetricsCollector: MetricsCollector = new InMemoryMetricsCollector(); + +/** + * Global tracer instance + */ +let globalTracer: Tracer = new SimpleTracer(); + +/** + * Set global metrics collector + */ +export function setMetricsCollector(collector: MetricsCollector): void { + globalMetricsCollector = collector; +} + +/** + * Get global metrics collector + */ +export function getMetricsCollector(): MetricsCollector { + return globalMetricsCollector; +} + +/** + * Set global tracer + */ +export function setTracer(tracer: Tracer): void { + globalTracer = tracer; +} + +/** + * Get global tracer + */ +export function getTracer(): Tracer { + return globalTracer; +} + +/** + * Record a counter metric (convenience function) + */ +export function recordCounter(name: string, value = 1, labels?: MetricLabels): void { + globalMetricsCollector.counter(name, value, labels); +} + +/** + * Record a gauge metric (convenience function) + */ +export function recordGauge(name: string, value: number, labels?: MetricLabels): void { + globalMetricsCollector.gauge(name, value, labels); +} + +/** + * Record a histogram observation (convenience function) + */ +export function recordHistogram(name: string, value: number, labels?: MetricLabels): void { + globalMetricsCollector.histogram(name, value, labels); +} + +/** + * Start a span (convenience function) + */ +export function startSpan(name: string, parentContext?: SpanContext, attributes?: Record): Span { + return globalTracer.startSpan(name, parentContext, attributes); +} + +/** + * End a span (convenience function) + */ +export function endSpan(span: Span, status?: SpanStatus): void { + globalTracer.endSpan(span, status); +} + +/** + * Run function within a span + */ +export async function withSpan( + name: string, + fn: (span: Span) => Promise, + parentContext?: SpanContext, + attributes?: Record, +): Promise { + const span = startSpan(name, parentContext, attributes); + const prevActive = globalTracer.getActiveSpan(); + globalTracer.setActiveSpan(span); + + try { + const result = await fn(span); + endSpan(span, "ok"); + return result; + } catch (error) { + endSpan(span, "error"); + span.attributes.error = error instanceof Error ? error.message : String(error); + throw error; + } finally { + globalTracer.setActiveSpan(prevActive); + } +} + +// Re-export types +export { InMemoryMetricsCollector, SimpleTracer }; diff --git a/cli/src/utils/opencode-parser.ts b/cli/src/utils/opencode-parser.ts new file mode 100644 index 00000000..5adc67a4 --- /dev/null +++ b/cli/src/utils/opencode-parser.ts @@ -0,0 +1,800 @@ +/** + * OpenCode JSON Stream Parser + * + * Parses OpenCode's JSON output format and provides: + * - Event type detection and classification + * - Human-readable descriptions for UI display + * - Filtering capabilities by event type, tool, status, etc. + * - Session reconstruction from log files + */ + +import { z } from "zod"; + +// ============================================================================= +// Zod Schemas for OpenCode Events +// ============================================================================= + +export const ToolStateSchema = z.object({ + status: z.string(), + input: z.any().optional(), + output: z.any().optional(), + title: z.string().optional(), + metadata: z.record(z.any()).optional(), + time: z + .object({ + start: z.number(), + end: z.number(), + }) + .optional(), +}); + +export const ToolUsePartSchema = z.object({ + id: z.string(), + sessionID: z.string(), + messageID: z.string(), + type: z.literal("tool"), + callID: z.string(), + tool: z.string(), + state: ToolStateSchema, + metadata: z.record(z.any()).optional(), +}); + +export const ToolUseEventSchema = z.object({ + type: z.literal("tool_use"), + timestamp: z.number(), + sessionID: z.string(), + part: ToolUsePartSchema, +}); + +export const StepStartPartSchema = z.object({ + id: z.string(), + sessionID: z.string(), + messageID: z.string(), + type: z.literal("step-start"), + snapshot: z.string(), +}); + +export const StepStartEventSchema = z.object({ + type: z.literal("step_start"), + timestamp: z.number(), + sessionID: z.string(), + part: StepStartPartSchema, +}); + +export const StepFinishPartSchema = z.object({ + id: z.string(), + sessionID: z.string(), + messageID: z.string(), + type: z.literal("step-finish"), + reason: z.string(), + snapshot: z.string(), + cost: z.number().optional(), + tokens: z + .object({ + input: z.number(), + output: z.number(), + reasoning: z.number().optional(), + cache: z + .object({ + read: z.number(), + write: z.number(), + }) + .optional(), + }) + .optional(), +}); + +export const StepFinishEventSchema = z.object({ + type: z.literal("step_finish"), + timestamp: z.number(), + sessionID: z.string(), + part: StepFinishPartSchema, +}); + +export const TextPartSchema = z.object({ + text: z.string(), +}); + +export const TextEventSchema = z.object({ + type: z.literal("text"), + sessionID: z.string().optional(), + sessionId: z.string().optional(), + session_id: z.string().optional(), + part: TextPartSchema, +}); + +export const ErrorEventSchema = z.object({ + type: z.literal("error"), + sessionID: z.string().optional(), + sessionId: z.string().optional(), + session_id: z.string().optional(), + error: z + .object({ + message: z.string(), + }) + .optional(), + message: z.string().optional(), +}); + +export const OpenCodeEventSchema = z.union([ + ToolUseEventSchema, + StepStartEventSchema, + StepFinishEventSchema, + TextEventSchema, + ErrorEventSchema, +]); + +// Type exports from schemas +export type ToolState = z.infer; +export type ToolUsePart = z.infer; +export type ToolUseEvent = z.infer; +export type StepStartEvent = z.infer; +export type StepFinishEvent = z.infer; +export type TextEvent = z.infer; +export type ErrorEvent = z.infer; +export type OpenCodeEvent = z.infer; + +// ============================================================================= +// Extended Type Definitions +// ============================================================================= + +export type EventType = + | "tool_use" + | "step_start" + | "step_finish" + | "text" + | "error" + | "plan" + | "thinking" + | "unknown"; + +// Tool types supported by OpenCode/Kimi +export type ToolType = + | "read" + | "write" + | "edit" + | "glob" + | "grep" + | "bash" + | "list" + | "search" + | "analyze" + | string; + +// Tool status types +export type ToolStatus = "completed" | "failed" | "in_progress" | "pending"; + +// File operation metadata +export interface FileOperation { + filePath?: string; + file_path?: string; + path?: string; + pattern?: string; + query?: string; + command?: string; + content?: string; + old_string?: string; + new_string?: string; +} + +// Extended tool state interface for runtime use (compatible with Zod schema) +export interface ExtendedToolState extends ToolState { + metadata?: { + count?: number; + truncated?: boolean; + preview?: string; + lines?: number; + bytes?: number; + }; +} + +// Enhanced tool use event for runtime processing +export interface EnhancedToolUseEvent extends ToolUseEvent { + part: ToolUsePart & { + metadata?: Record; + }; +} + +export interface ParsedEvent { + /** Original raw line */ + raw: string; + /** Parsed event data */ + event: OpenCodeEvent | null; + /** Event type classification */ + eventType: EventType; + /** Whether parsing succeeded */ + isValid: boolean; + /** Error message if parsing failed */ + parseError?: string; + /** Line number in source file */ + lineNumber?: number; +} + +export interface ToolUseDetails { + tool: ToolType; + status: string; + description: string; + filePath?: string; + pattern?: string; + command?: string; + duration?: number; + output?: string; + metadata?: Record; +} + +export interface StepDetails { + stepId: string; + sessionId: string; + reason?: string; + tokens?: { + input: number; + output: number; + reasoning?: number; + cache?: { read: number; write: number }; + }; + cost?: number; + duration?: number; +} + +// ============================================================================= +// Event Detection & Classification +// ============================================================================= + +/** + * Detect event type from a JSON object + */ +export function detectEventType(obj: unknown): EventType { + if (typeof obj !== "object" || obj === null) { + return "unknown"; + } + + const type = (obj as Record).type; + if (typeof type !== "string") { + return "unknown"; + } + + switch (type) { + case "tool_use": + return "tool_use"; + case "step_start": + return "step_start"; + case "step_finish": + return "step_finish"; + case "text": + return "text"; + case "error": + return "error"; + default: + return "unknown"; + } +} + +/** + * Detect tool type from tool_use event + */ +export function detectToolType(event: ToolUseEvent): ToolType { + return event.part.tool || "unknown"; +} + +/** + * Get tool use details for UI display + */ +export function getToolUseDetails(event: ToolUseEvent): ToolUseDetails { + const part = event.part; + const state = part.state; + const tool = part.tool; + + let description = `${tool}: `; + let filePath: string | undefined; + let pattern: string | undefined; + let command: string | undefined; + + switch (tool) { + case "read": + filePath = state.input?.filePath || state.input?.file_path; + description += `Reading ${filePath ? truncatePath(filePath) : "file"}`; + break; + case "write": + filePath = state.input?.filePath || state.input?.file_path; + description += `Writing ${filePath ? truncatePath(filePath) : "file"}`; + break; + case "edit": + filePath = state.input?.filePath || state.input?.file_path; + description += `Editing ${filePath ? truncatePath(filePath) : "file"}`; + break; + case "glob": + pattern = state.input?.pattern; + description += `Searching pattern "${pattern || "unknown"}"`; + break; + case "grep": + pattern = state.input?.pattern || state.input?.query; + description += `Grep "${pattern || "unknown"}"`; + break; + case "bash": + command = state.input?.command; + description += `Running: ${command ? truncateCommand(command) : "shell command"}`; + break; + case "list": + filePath = state.input?.path; + description += `Listing directory ${filePath || "."}`; + break; + default: + description += `Executing ${tool}`; + } + + if (state.status === "completed") { + description += " ✓"; + } else if (state.status === "failed") { + description += " ✗"; + } else if (state.status === "in_progress") { + description += " ..."; + } + + const duration = state.time ? state.time.end - state.time.start : undefined; + + return { + tool, + status: state.status, + description, + filePath, + pattern, + command, + duration, + output: typeof state.output === "string" ? state.output : undefined, + metadata: state.metadata, + }; +} + +/** + * Get step details for UI display + */ +export function getStepDetails(event: StepStartEvent | StepFinishEvent): StepDetails { + const isFinish = event.type === "step_finish"; + const part = event.part; + + return { + stepId: part.id, + sessionId: part.sessionID, + reason: isFinish ? (event as StepFinishEvent).part.reason : undefined, + tokens: isFinish ? (event as StepFinishEvent).part.tokens : undefined, + cost: isFinish ? (event as StepFinishEvent).part.cost : undefined, + }; +} + +// ============================================================================= +// Parsing Functions +// ============================================================================= + +/** + * Parse a single line of OpenCode JSON output + */ +export function parseOpenCodeLine(line: string, lineNumber?: number): ParsedEvent { + const trimmed = line.trim(); + + if (!trimmed) { + return { + raw: line, + event: null, + eventType: "unknown", + isValid: false, + lineNumber, + }; + } + + // Handle [RAW OPENCODE OUTPUT] prefix + const prefix = "[RAW OPENCODE OUTPUT] "; + const jsonStr = trimmed.startsWith(prefix) ? trimmed.slice(prefix.length) : trimmed; + + try { + const obj = JSON.parse(jsonStr); + const eventType = detectEventType(obj); + + // Validate with Zod schema + const parseResult = OpenCodeEventSchema.safeParse(obj); + + if (parseResult.success) { + return { + raw: line, + event: parseResult.data, + eventType, + isValid: true, + lineNumber, + }; + } + + // If Zod validation fails but we have a known type, keep the parsed object + // for diagnostics while marking the line as invalid. + if (eventType !== "unknown") { + return { + raw: line, + event: obj as OpenCodeEvent, + eventType, + isValid: false, + parseError: parseResult.error?.message, + lineNumber, + }; + } + + return { + raw: line, + event: obj as OpenCodeEvent, + eventType: "unknown", + isValid: false, + parseError: parseResult.error?.message, + lineNumber, + }; + } catch (error) { + return { + raw: line, + event: null, + eventType: "unknown", + isValid: false, + parseError: error instanceof Error ? error.message : "Unknown parse error", + lineNumber, + }; + } +} + +/** + * Parse entire OpenCode log content + */ +export function parseOpenCodeLog(content: string): ParsedEvent[] { + const lines = content.split("\n"); + return lines.map((line, index) => parseOpenCodeLine(line, index + 1)); +} + +// ============================================================================= +// Filtering Functions +// ============================================================================= + +export interface FilterOptions { + /** Filter by event types */ + eventTypes?: EventType[]; + /** Filter by tool types (for tool_use events) */ + tools?: ToolType[]; + /** Filter by tool status */ + status?: string[]; + /** Include only events with errors */ + onlyErrors?: boolean; + /** Filter by session ID */ + sessionId?: string; + /** Filter by file path (for read/write/edit events) */ + filePath?: string; + /** Search in output/content */ + searchText?: string; +} + +/** + * Filter parsed events based on options + */ +export function filterEvents(events: ParsedEvent[], options: FilterOptions): ParsedEvent[] { + return events.filter((parsed) => { + if (!parsed.isValid || !parsed.event) { + return false; + } + + // Filter by event type + if (options.eventTypes && options.eventTypes.length > 0) { + if (!options.eventTypes.includes(parsed.eventType)) { + return false; + } + } + + // Filter by session ID + if (options.sessionId) { + const event = parsed.event as Record; + if (event.sessionID !== options.sessionId && event.sessionId !== options.sessionId) { + return false; + } + } + + // Filter tool_use events + if (parsed.eventType === "tool_use") { + const toolEvent = parsed.event as ToolUseEvent; + + // Filter by tool type + if (options.tools && options.tools.length > 0) { + if (!options.tools.includes(toolEvent.part.tool)) { + return false; + } + } + + // Filter by status + if (options.status && options.status.length > 0) { + if (!options.status.includes(toolEvent.part.state.status)) { + return false; + } + } + + // Filter by file path + const filterPath = options.filePath; + if (filterPath) { + const input = toolEvent.part.state.input || {}; + const filePaths = [input.filePath, input.file_path, input.path, input.pattern].filter( + Boolean, + ); + + if (!filePaths.some((fp) => fp?.includes(filterPath))) { + return false; + } + } + } + + // Filter errors + if (options.onlyErrors) { + if (parsed.eventType === "error") { + return true; + } + if (parsed.eventType === "tool_use") { + const toolEvent = parsed.event as ToolUseEvent; + if (toolEvent.part.state.status !== "failed") { + return false; + } + } + } + + // Search text + if (options.searchText) { + const searchLower = options.searchText.toLowerCase(); + const textToSearch = parsed.raw.toLowerCase(); + if (!textToSearch.includes(searchLower)) { + return false; + } + } + + return true; + }); +} + +// ============================================================================= +// Formatting Utilities +// ============================================================================= + +/** + * Truncate a file path for display + */ +function truncatePath(path: string, maxLength = 40): string { + if (path.length <= maxLength) return path; + const parts = path.split(/[/\\]/); + if (parts.length <= 2) return `...${path.slice(-maxLength + 3)}`; + return `.../${parts.slice(-2).join("/")}`; +} + +/** + * Truncate a command for display + */ +function truncateCommand(command: string, maxLength = 50): string { + if (command.length <= maxLength) return command; + return `${command.slice(0, maxLength - 3)}...`; +} + +/** + * Format duration in milliseconds to human readable string + */ +export function formatDuration(ms: number): string { + if (ms < 1000) return `${ms}ms`; + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; + return `${(ms / 60000).toFixed(1)}m`; +} + +/** + * Format token count with commas + */ +export function formatTokens(count: number): string { + return count.toLocaleString(); +} + +/** + * Get a human-readable summary of a parsed event for UI display + */ +export function getEventSummary(parsed: ParsedEvent): string { + if (!parsed.isValid || !parsed.event) { + return "Invalid or unparsable line"; + } + + switch (parsed.eventType) { + case "tool_use": { + const details = getToolUseDetails(parsed.event as ToolUseEvent); + return details.description; + } + case "step_start": { + const details = getStepDetails(parsed.event as StepStartEvent); + return `Step started (${truncateId(details.stepId)})`; + } + case "step_finish": { + const details = getStepDetails(parsed.event as StepFinishEvent); + let summary = `Step finished (${truncateId(details.stepId)})`; + if (details.tokens) { + summary += ` - ${formatTokens(details.tokens.input)} → ${formatTokens(details.tokens.output)} tokens`; + } + if (details.cost !== undefined) { + summary += ` - $${details.cost.toFixed(4)}`; + } + return summary; + } + case "text": { + const text = (parsed.event as TextEvent).part.text; + // Check for structured content sections + const structuredSummary = extractStructuredSummary(text); + if (structuredSummary) { + return structuredSummary; + } + return truncateText(text, 60); + } + case "error": { + const error = parsed.event as ErrorEvent; + return `Error: ${error.message || error.error?.message || "Unknown error"}`; + } + default: + return "Unknown event type"; + } +} + +/** + * Truncate text with ellipsis + */ +function truncateText(text: string, maxLength: number): string { + if (text.length <= maxLength) return text; + return `${text.slice(0, maxLength - 3)}...`; +} + +/** + * Extract a summary from structured text content with sections like , , , + */ +export function extractStructuredSummary(text: string): string | null { + // Check for structured sections + const hasAnalysis = text.includes(""); + const hasPlan = text.includes(""); + const hasFiles = text.includes(""); + const hasOptimization = text.includes(""); + + if (!hasAnalysis && !hasPlan && !hasFiles && !hasOptimization) { + return null; + } + + const parts: string[] = []; + + // Extract problem from ANALYSIS section + if (hasAnalysis) { + const problemMatch = text.match(/\s*\n?\s*-\s*Problem:\s*([^\n]+)/i); + if (problemMatch) { + parts.push(`ANALYSIS: ${problemMatch[1].trim()}`); + } + } + + // Extract plan steps count + if (hasPlan) { + const planSteps = text.match(/\d+\./g); + if (planSteps) { + parts.push(`PLAN: ${planSteps.length} steps`); + } + } + + // Extract file count + if (hasFiles) { + const fileMatches = text.match(/[\w/]+\.(gd|cs|ts|js|py|yaml|json|md|txt)/gi); + if (fileMatches) { + const uniqueFiles = new Set(fileMatches); + parts.push(`FILES: ${uniqueFiles.size} files`); + } + } + + // Extract optimization approach + if (hasOptimization) { + const approachMatch = text.match(/-\s*Most efficient approach:\s*([^\n]+)/i); + if (approachMatch) { + parts.push(`OPTIMIZATION: ${approachMatch[1].trim()}`); + } + } + + return parts.length > 0 ? parts.join(" | ") : null; +} + +/** + * Truncate ID for display + */ +function truncateId(id: string): string { + if (id.length <= 12) return id; + return `${id.slice(0, 6)}...${id.slice(-6)}`; +} + +// ============================================================================= +// Session Analysis +// ============================================================================= + +export interface SessionSummary { + sessionId: string; + startTime?: number; + endTime?: number; + stepCount: number; + toolUseCount: number; + totalTokens: { input: number; output: number; reasoning?: number }; + totalCost: number; + toolsUsed: Map; + errors: string[]; +} + +/** + * Analyze a session from parsed events + */ +export function analyzeSession(events: ParsedEvent[], sessionId: string): SessionSummary { + const summary: SessionSummary = { + sessionId, + stepCount: 0, + toolUseCount: 0, + totalTokens: { input: 0, output: 0, reasoning: 0 }, + totalCost: 0, + toolsUsed: new Map(), + errors: [], + }; + + for (const parsed of events) { + if (!parsed.isValid || !parsed.event) continue; + + // Check if event belongs to this session + const event = parsed.event as Record; + if (event.sessionID !== sessionId && event.sessionId !== sessionId) { + continue; + } + + // Track timestamps + if (typeof event.timestamp === "number") { + if (!summary.startTime || event.timestamp < summary.startTime) { + summary.startTime = event.timestamp; + } + if (!summary.endTime || event.timestamp > summary.endTime) { + summary.endTime = event.timestamp; + } + } + + switch (parsed.eventType) { + case "step_start": + summary.stepCount++; + break; + case "step_finish": { + const finishEvent = parsed.event as StepFinishEvent; + if (finishEvent.part.tokens) { + summary.totalTokens.input += finishEvent.part.tokens.input || 0; + summary.totalTokens.output += finishEvent.part.tokens.output || 0; + summary.totalTokens.reasoning = + (summary.totalTokens.reasoning || 0) + (finishEvent.part.tokens.reasoning || 0); + } + if (finishEvent.part.cost !== undefined) { + summary.totalCost += finishEvent.part.cost; + } + break; + } + case "tool_use": { + summary.toolUseCount++; + const toolEvent = parsed.event as ToolUseEvent; + const tool = toolEvent.part.tool; + summary.toolsUsed.set(tool, (summary.toolsUsed.get(tool) || 0) + 1); + break; + } + case "error": { + const errorEvent = parsed.event as ErrorEvent; + summary.errors.push(errorEvent.message || errorEvent.error?.message || "Unknown error"); + break; + } + } + } + + return summary; +} + +/** + * Extract all session IDs from parsed events + */ +export function extractSessionIds(events: ParsedEvent[]): string[] { + const ids = new Set(); + for (const parsed of events) { + if (parsed.isValid && parsed.event) { + const event = parsed.event as Record; + const sessionId = event.sessionID || event.sessionId; + if (typeof sessionId === "string") { + ids.add(sessionId); + } + } + } + return Array.from(ids); +} diff --git a/cli/src/utils/resource-manager.ts b/cli/src/utils/resource-manager.ts new file mode 100644 index 00000000..fdac98c5 --- /dev/null +++ b/cli/src/utils/resource-manager.ts @@ -0,0 +1,454 @@ +/** + * ResourceManager - Manages system resources including files, memory, and cleanup operations + * Provides centralized resource tracking, cleanup, and error handling + */ + +import type { ChildProcess } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import { + existsSync, + mkdirSync, + readdirSync, + rmSync, + rmdirSync, + statSync, + unlinkSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { registerCleanup, registerProcess } from "./cleanup.ts"; +import { RalphyError, standardizeError } from "./errors.ts"; + +/** + * Generate a cryptographically secure random string for resource IDs + */ +function generateSecureId(): string { + return randomBytes(8).toString("hex"); +} + +export class ResourceError extends RalphyError { + constructor(message: string, context?: Record) { + super(message, "RESOURCE_ERROR", context); + this.name = "ResourceError"; + } +} + +interface ResourceInfo { + id: string; + type: "file" | "directory" | "process" | "memory" | "temp"; + path?: string; + process?: ChildProcess; + data?: unknown; + created: Date; + lastAccessed: Date; + size?: number; + cleanup?: () => void | Promise; +} + +interface ResourceStats { + totalResources: number; + filesTracked: number; + processesTracked: number; + tempDirectories: number; + totalDiskUsage: number; + oldestResource: Date | null; +} + +/** + * ResourceManager - Centralized resource management with automatic cleanup + */ +export class ResourceManager { + private resources: Map = new Map(); + private maxMemoryUsage: number; + private maxTempFileSize: number; + private cleanupInterval: number; + private intervalId?: NodeJS.Timeout; + + constructor( + options: { + maxMemoryUsage?: number; + maxTempFileSize?: number; + cleanupInterval?: number; + } = {}, + ) { + this.maxMemoryUsage = options.maxMemoryUsage ?? 100 * 1024 * 1024; // 100MB + this.maxTempFileSize = options.maxTempFileSize ?? 10 * 1024 * 1024; // 10MB + this.cleanupInterval = options.cleanupInterval ?? 60000; // 1 minute + + // Register cleanup handler + registerCleanup(() => this.cleanup()); + + // Start periodic cleanup + this.startPeriodicCleanup(); + } + + /** + * Create a temporary directory and track it for cleanup + * Uses system temp directory for security (not process.cwd()) + */ + createTempDir(prefix = "ralphy-temp"): string { + // Use system temp directory for security, with cryptographically secure random suffix + const tempDir = join(tmpdir(), `${prefix}-${Date.now()}-${generateSecureId()}`); + + try { + mkdirSync(tempDir, { recursive: true }); + + const resourceId = `temp-dir-${generateSecureId()}`; + this.resources.set(resourceId, { + id: resourceId, + type: "directory", + path: tempDir, + created: new Date(), + lastAccessed: new Date(), + cleanup: () => { + if (existsSync(tempDir)) { + this.removeDirectory(tempDir); + } + }, + }); + + return tempDir; + } catch (error) { + throw new ResourceError(`Failed to create temp directory: ${tempDir}`, { + tempDir, + error: standardizeError(error), + }); + } + } + + /** + * Create a temporary file and track it for cleanup + * Uses system temp directory for security (not process.cwd()) + */ + createTempFile(content: string | Buffer, prefix = "ralphy-temp", extension = ".tmp"): string { + // Use system temp directory for security, with cryptographically secure random suffix + const tempFile = join(tmpdir(), `${prefix}-${Date.now()}-${generateSecureId()}${extension}`); + + try { + // Check file size limits + const contentSize = + typeof content === "string" ? Buffer.byteLength(content, "utf-8") : content.length; + if (contentSize > this.maxTempFileSize) { + throw new ResourceError( + `Temp file size exceeds limit: ${contentSize} > ${this.maxTempFileSize}`, + ); + } + + writeFileSync(tempFile, content); + + const resourceId = `temp-file-${generateSecureId()}`; + this.resources.set(resourceId, { + id: resourceId, + type: "file", + path: tempFile, + created: new Date(), + lastAccessed: new Date(), + size: contentSize, + cleanup: () => { + if (existsSync(tempFile)) { + unlinkSync(tempFile); + } + }, + }); + + return tempFile; + } catch (error) { + throw new ResourceError(`Failed to create temp file: ${tempFile}`, { + tempFile, + error: standardizeError(error), + }); + } + } + + /** + * Track a file resource for management + */ + trackFile(filePath: string, autoCleanup = false): string { + const resolvedPath = resolve(filePath); + + try { + if (!existsSync(resolvedPath)) { + throw new ResourceError(`File does not exist: ${resolvedPath}`); + } + + const stats = statSync(resolvedPath); + const resourceId = `file-${Date.now()}-${generateSecureId()}`; + + this.resources.set(resourceId, { + id: resourceId, + type: "file", + path: resolvedPath, + created: new Date(), + lastAccessed: new Date(), + size: stats.size, + cleanup: autoCleanup + ? () => { + if (existsSync(resolvedPath)) { + unlinkSync(resolvedPath); + } + } + : undefined, + }); + + return resourceId; + } catch (error) { + throw new ResourceError(`Failed to track file: ${resolvedPath}`, { + resolvedPath, + error: standardizeError(error), + }); + } + } + + /** + * Track a process resource for management + */ + trackProcess(process: ChildProcess): string { + const resourceId = `process-${Date.now()}-${generateSecureId()}`; + + // Register with global cleanup system + const removeCleanup = registerProcess(process); + + this.resources.set(resourceId, { + id: resourceId, + type: "process", + process, + created: new Date(), + lastAccessed: new Date(), + cleanup: removeCleanup, + }); + + return resourceId; + } + + /** + * Track in-memory data resource + */ + trackMemory(data: unknown, _description = ""): string { + const resourceId = `memory-${Date.now()}-${generateSecureId()}`; + + // Estimate memory usage (rough approximation) + let size = 0; + try { + size = JSON.stringify(data).length * 2; // UTF-16 bytes + } catch { + // Non-serializable values (e.g. circular refs, BigInt) are allowed; skip size estimate. + } + + this.resources.set(resourceId, { + id: resourceId, + type: "memory", + data, + created: new Date(), + lastAccessed: new Date(), + size, + }); + + return resourceId; + } + + /** + * Get tracked resource information + */ + getResource(resourceId: string): ResourceInfo | undefined { + const resource = this.resources.get(resourceId); + if (resource) { + resource.lastAccessed = new Date(); + } + return resource; + } + + /** + * Remove a specific resource and trigger its cleanup + */ + async removeResource(resourceId: string): Promise { + const resource = this.resources.get(resourceId); + if (!resource) { + return; + } + + try { + if (resource.cleanup) { + await resource.cleanup(); + } + this.resources.delete(resourceId); + } catch (error) { + throw new ResourceError(`Failed to cleanup resource: ${resourceId}`, { + resourceId, + error: standardizeError(error), + }); + } + } + + /** + * Get resource statistics + */ + getStats(): ResourceStats { + const resources = Array.from(this.resources.values()); + const filesTracked = resources.filter((r) => r.type === "file").length; + const processesTracked = resources.filter((r) => r.type === "process").length; + const tempDirectories = resources.filter((r) => r.type === "directory").length; + const totalDiskUsage = resources + .filter((r) => r.size) + .reduce((sum, r) => sum + (r.size ?? 0), 0); + const oldestResource = + resources.length > 0 + ? new Date(Math.min(...resources.map((r) => r.created.getTime()))) + : null; + + return { + totalResources: resources.length, + filesTracked, + processesTracked, + tempDirectories, + totalDiskUsage, + oldestResource, + }; + } + + /** + * Clean up old or expired resources + */ + async cleanup(options: { maxAge?: number; force?: boolean } = {}): Promise { + const { maxAge = 30 * 60 * 1000, force = false } = options; // 30 minutes default + const now = Date.now(); + const resourcesToRemove: string[] = []; + + for (const [id, resource] of this.resources) { + const age = now - resource.created.getTime(); + + if (force || age > maxAge) { + resourcesToRemove.push(id); + } + } + + // Remove resources in parallel + await Promise.allSettled(resourcesToRemove.map((id) => this.removeResource(id))); + } + + /** + * Force cleanup of all resources + */ + async cleanupAll(): Promise { + const resourceIds = Array.from(this.resources.keys()); + await Promise.allSettled(resourceIds.map((id) => this.removeResource(id))); + } + + /** + * Check memory usage and cleanup if necessary + */ + private checkMemoryUsage(): void { + const stats = this.getStats(); + if (stats.totalDiskUsage > this.maxMemoryUsage) { + // Clean up oldest resources first + void this.cleanup({ maxAge: 10 * 60 * 1000 }).catch((err) => { + console.error("Memory-triggered cleanup failed:", err); + }); // 10 minutes + } + } + + /** + * Start periodic cleanup + */ + private startPeriodicCleanup(): void { + this.intervalId = setInterval(() => { + this.checkMemoryUsage(); + this.cleanup().catch((err) => { + console.error("Cleanup failed:", err); + }); + }, this.cleanupInterval); + } + + /** + * Stop periodic cleanup + */ + stopPeriodicCleanup(): void { + if (this.intervalId) { + clearInterval(this.intervalId); + this.intervalId = undefined; + } + } + + /** + * Recursively remove a directory + */ + private removeDirectory(dirPath: string): void { + try { + rmSync(dirPath, { recursive: true, force: true }); + } catch (_error) { + // Fallback for older Node.js versions + try { + const files = readdirSync(dirPath); + for (const file of files) { + const filePath = join(dirPath, file); + const stats = statSync(filePath); + if (stats.isDirectory()) { + this.removeDirectory(filePath); + } else { + unlinkSync(filePath); + } + } + rmdirSync(dirPath); + } catch (_fallbackError) { + // Fallback cleanup failed, ignore to prevent blocking + } + } + } + + /** + * Get list of all tracked resources + */ + listResources(): ResourceInfo[] { + return Array.from(this.resources.values()); + } + + /** + * Check if a resource exists and is accessible + */ + validateResource(resourceId: string): boolean { + const resource = this.resources.get(resourceId); + if (!resource) { + return false; + } + + switch (resource.type) { + case "file": + return resource.path ? existsSync(resource.path) : false; + case "process": + return resource.process ? Boolean(resource.process.pid) : false; + case "directory": + return resource.path ? existsSync(resource.path) : false; + case "memory": + case "temp": + return true; // Always valid unless explicitly removed + default: + return false; + } + } +} + +// Global instance for singleton usage +let globalResourceManager: ResourceManager | undefined; + +/** + * Get or create the global ResourceManager instance + */ +export function getResourceManager(): ResourceManager { + if (!globalResourceManager) { + globalResourceManager = new ResourceManager(); + } + return globalResourceManager; +} + +/** + * Reset the global ResourceManager (useful for testing) + */ +export function resetResourceManager(): void { + if (globalResourceManager) { + globalResourceManager.stopPeriodicCleanup(); + globalResourceManager.cleanupAll().catch((err) => { + console.error("Global cleanup failed:", err); + }); + globalResourceManager = undefined; + } +} diff --git a/cli/src/utils/sanitization.ts b/cli/src/utils/sanitization.ts new file mode 100644 index 00000000..6ea41ea4 --- /dev/null +++ b/cli/src/utils/sanitization.ts @@ -0,0 +1,63 @@ +/** + * Sanitization utilities for removing sensitive data + * + * SECURITY: All patterns use bounded quantifiers to prevent ReDoS attacks + */ + +/** + * Maximum input length for secret sanitization to prevent ReDoS + */ +const MAX_SANITIZE_INPUT_LENGTH = 1000000; // 1MB + +function truncateToMaxBytes(input: string, maxBytes: number): string { + if (Buffer.byteLength(input, "utf8") <= maxBytes) { + return input; + } + + let end = Math.min(input.length, maxBytes); + while (end > 0 && Buffer.byteLength(input.slice(0, end), "utf8") > maxBytes) { + end--; + } + + return input.slice(0, end); +} + +/** + * Sanitize sensitive data (API keys, passwords, etc.) from string input + * + * SECURITY NOTE: This function includes protections against ReDoS attacks: + * - Input length is limited to MAX_SANITIZE_INPUT_LENGTH + * - All regex patterns use bounded quantifiers (e.g., {48}, {36}) + * - Patterns are applied sequentially with early exit if input becomes too large + * + * @param input - The string to sanitize + * @returns Sanitized string with secrets redacted + */ +export function sanitizeSecrets(input: string): string { + // Limit input length to prevent ReDoS attacks + if (Buffer.byteLength(input, "utf8") > MAX_SANITIZE_INPUT_LENGTH) { + // For very large inputs, truncate and add warning + const truncated = truncateToMaxBytes(input, MAX_SANITIZE_INPUT_LENGTH); + return `${truncated}\n\n[WARNING: Content truncated due to size limits during secret sanitization]`; + } + + // All patterns use bounded quantifiers to prevent ReDoS + // Patterns are designed to match specific token formats with fixed lengths + const patterns = [ + { regex: /sk-[a-zA-Z0-9]{48}/g, replacement: "[API_KEY_REDACTED]" }, + { regex: /sk-ant-[a-zA-Z0-9_-]{16,256}/g, replacement: "[ANTHROPIC_KEY_REDACTED]" }, + { regex: /ghp_[a-zA-Z0-9]{36}/g, replacement: "[GITHUB_TOKEN_REDACTED]" }, + { regex: /gho_[a-zA-Z0-9]{52}/g, replacement: "[GITHUB_OAUTH_REDACTED]" }, + { regex: /AKIA[0-9A-Z]{16}/g, replacement: "[AWS_KEY_REDACTED]" }, + // For hex secrets, use a bounded length and require word boundaries to prevent + // matching large hex strings that could cause performance issues + { regex: /\b[0-9a-f]{64}\b/g, replacement: "[HEX_SECRET_REDACTED]" }, + ]; + + let result = input; + for (const { regex, replacement } of patterns) { + result = result.replace(regex, replacement); + } + + return result; +} diff --git a/cli/src/utils/templates.ts b/cli/src/utils/templates.ts new file mode 100644 index 00000000..a02961ee --- /dev/null +++ b/cli/src/utils/templates.ts @@ -0,0 +1,515 @@ +/** + * Prompt Template System for Ralphy CLI + * + * Provides customizable prompt building with: + * - Template variables + * - Conditional sections + * - Template inheritance + * - Multiple output formats + */ + +import { logDebugContext, logErrorContext } from "../ui/logger.ts"; + +const MAX_TEMPLATE_PATTERN_LENGTH = 200; +const MAX_TEMPLATE_VALUE_LENGTH = 2000; + +function isUnsafeTemplateRegex(pattern: string): boolean { + // Reject advanced features and nested quantifier forms that are common ReDoS vectors. + if (/\\\d/.test(pattern)) return true; + if (/\(\?(?:[:=!<])/.test(pattern)) return true; + if (/\((?:[^()\\]|\\.)*[+*][^)]*\)[+*{]/.test(pattern)) return true; + if (/\([^)]*\|[^)]*\)[+*{]/.test(pattern)) return true; + return false; +} + +function hasOwnContextValue(context: TemplateContext, key: string): boolean { + return Object.prototype.hasOwnProperty.call(context, key); +} + +/** + * Template variable definition + */ +export interface TemplateVariable { + /** Variable name */ + name: string; + /** Variable description */ + description: string; + /** Default value */ + default?: string; + /** Whether variable is required */ + required?: boolean; + /** Validation regex pattern */ + pattern?: string; +} + +/** + * Template section + */ +export interface TemplateSection { + /** Section name */ + name: string; + /** Section content (can include variables) */ + content: string; + /** Condition to include this section */ + condition?: string; + /** Section priority (for ordering) */ + priority?: number; +} + +/** + * Prompt template definition + */ +export interface PromptTemplate { + /** Template name */ + name: string; + /** Template description */ + description: string; + /** Template version */ + version: string; + /** Variables used in template */ + variables: TemplateVariable[]; + /** Template sections */ + sections: TemplateSection[]; + /** Base template to extend (optional) */ + extends?: string; + /** Output format */ + outputFormat?: "text" | "markdown" | "json"; +} + +/** + * Template context with variable values + */ +export interface TemplateContext { + [variableName: string]: string | number | boolean | string[]; +} + +/** + * Rendered prompt result + */ +export interface RenderedPrompt { + /** Rendered prompt text */ + prompt: string; + /** Variables that were used */ + usedVariables: string[]; + /** Sections that were included */ + includedSections: string[]; + /** Template metadata */ + metadata: { + templateName: string; + version: string; + renderedAt: string; + }; +} + +/** + * Template engine for building prompts + */ +export class TemplateEngine { + private templates: Map = new Map(); + private parentEngine?: TemplateEngine; + + constructor(parent?: TemplateEngine) { + this.parentEngine = parent; + } + + /** + * Register a template + */ + register(template: PromptTemplate): void { + // Validate template + this.validateTemplate(template); + + this.templates.set(template.name, template); + logDebugContext("TemplateEngine", `Registered template: ${template.name} v${template.version}`); + } + + /** + * Get a template by name + */ + getTemplate(name: string): PromptTemplate | undefined { + return this.templates.get(name) || this.parentEngine?.getTemplate(name); + } + + /** + * Check if template exists + */ + hasTemplate(name: string): boolean { + return this.templates.has(name) || (this.parentEngine?.hasTemplate(name) ?? false); + } + + /** + * Render a template with context + */ + render(templateName: string, context: TemplateContext): RenderedPrompt { + const template = this.getTemplate(templateName); + if (!template) { + throw new Error(`Template not found: ${templateName}`); + } + + // If template extends another, merge with parent + let effectiveTemplate = template; + if (template.extends) { + const parentTemplate = this.getTemplate(template.extends); + if (parentTemplate) { + effectiveTemplate = this.mergeTemplates(parentTemplate, template); + } + } + + // Validate context + this.validateContext(effectiveTemplate, context); + + // Render sections + const includedSections: string[] = []; + const renderedSections: string[] = []; + + // Sort sections by priority + const sortedSections = [...effectiveTemplate.sections].sort( + (a, b) => (a.priority ?? 0) - (b.priority ?? 0), + ); + + for (const section of sortedSections) { + // Check condition + if (section.condition && !this.evaluateCondition(section.condition, context)) { + continue; + } + + includedSections.push(section.name); + + // Render variables in section content + let renderedContent = this.renderVariables(section.content, context); + + // Apply output format + renderedContent = this.applyOutputFormat(renderedContent, effectiveTemplate.outputFormat); + + renderedSections.push(renderedContent); + } + + const usedVariables = effectiveTemplate.variables.map((v) => v.name); + + return { + prompt: renderedSections.join("\n\n"), + usedVariables, + includedSections, + metadata: { + templateName: template.name, + version: template.version, + renderedAt: new Date().toISOString(), + }, + }; + } + + /** + * Render a template string directly with context + */ + renderString(templateString: string, context: TemplateContext): string { + return this.renderVariables(templateString, context); + } + + /** + * Get all registered template names + */ + getTemplateNames(): string[] { + const names = Array.from(this.templates.keys()); + if (this.parentEngine) { + const parentNames = this.parentEngine.getTemplateNames(); + return [...new Set([...names, ...parentNames])]; + } + return names; + } + + /** + * Unregister a template + */ + unregister(name: string): boolean { + const deleted = this.templates.delete(name); + if (deleted) { + logDebugContext("TemplateEngine", `Unregistered template: ${name}`); + } + return deleted; + } + + /** + * Clear all templates + */ + clear(): void { + this.templates.clear(); + logDebugContext("TemplateEngine", "All templates cleared"); + } + + // Private helpers + + private validateTemplate(template: PromptTemplate): void { + if (!template.name) { + throw new Error("Template must have a name"); + } + if (!template.sections || template.sections.length === 0) { + throw new Error(`Template ${template.name} must have at least one section`); + } + + // Check for duplicate variable names + const varNames = new Set(); + for (const variable of template.variables) { + if (varNames.has(variable.name)) { + throw new Error(`Duplicate variable name in template ${template.name}: ${variable.name}`); + } + varNames.add(variable.name); + } + + // Validate variable references in sections + for (const section of template.sections) { + const varRefs = this.extractVariableReferences(section.content); + for (const ref of varRefs) { + if (!varNames.has(ref)) { + logErrorContext( + "TemplateEngine", + `Template ${template.name} references undefined variable: ${ref}`, + ); + } + } + } + } + + private validateContext(template: PromptTemplate, context: TemplateContext): void { + for (const variable of template.variables) { + if (variable.required && !hasOwnContextValue(context, variable.name)) { + throw new Error( + `Missing required variable '${variable.name}' for template '${template.name}'`, + ); + } + + if (variable.pattern && hasOwnContextValue(context, variable.name)) { + const value = String(context[variable.name]); + if (variable.pattern.length > MAX_TEMPLATE_PATTERN_LENGTH) { + throw new Error(`Variable '${variable.name}' pattern is too long`); + } + if (isUnsafeTemplateRegex(variable.pattern)) { + throw new Error(`Variable '${variable.name}' pattern uses unsafe regex constructs`); + } + if (value.length > MAX_TEMPLATE_VALUE_LENGTH) { + throw new Error(`Variable '${variable.name}' value is too long for regex validation`); + } + let regex: RegExp; + try { + regex = new RegExp(variable.pattern); + } catch { + throw new Error(`Variable '${variable.name}' has invalid pattern '${variable.pattern}'`); + } + if (!regex.test(value)) { + throw new Error( + `Variable '${variable.name}' value '${value}' does not match pattern '${variable.pattern}'`, + ); + } + } + } + } + + private mergeTemplates(parent: PromptTemplate, child: PromptTemplate): PromptTemplate { + // Merge sections (child overrides parent with same name) + const parentSections = new Map(parent.sections.map((s) => [s.name, s])); + const mergedSections: TemplateSection[] = []; + + for (const section of child.sections) { + parentSections.set(section.name, section); + } + + for (const section of parentSections.values()) { + mergedSections.push(section); + } + + // Merge variables (child overrides parent with same name) + const parentVars = new Map(parent.variables.map((v) => [v.name, v])); + for (const variable of child.variables) { + parentVars.set(variable.name, variable); + } + + return { + name: child.name, + description: child.description || parent.description, + version: child.version, + variables: Array.from(parentVars.values()), + sections: mergedSections, + outputFormat: child.outputFormat || parent.outputFormat, + }; + } + + private extractVariableReferences(content: string): string[] { + const regex = /\{\{(\w+)\}\}/g; + const matches: string[] = []; + let match: RegExpExecArray | null = null; + + while (true) { + match = regex.exec(content); + if (match === null) break; + matches.push(match[1]); + } + + return matches; + } + + private renderVariables(content: string, context: TemplateContext): string { + return content.replace(/\{\{(\w+)\}\}/g, (match, varName) => { + if (hasOwnContextValue(context, varName)) { + const value = context[varName]; + if (Array.isArray(value)) { + return value.join("\n"); + } + return String(value); + } + return match; // Keep original if not found + }); + } + + private evaluateCondition(condition: string, context: TemplateContext): boolean { + // Simple condition evaluation + // Supports: variable, !variable, variable=value, variable!=value + const normalizedCondition = condition.trim(); + + // Negation + if (normalizedCondition.startsWith("!")) { + const varName = normalizedCondition.slice(1); + return hasOwnContextValue(context, varName) ? !context[varName] : true; + } + + // Inequality check + const neqMatch = normalizedCondition.match(/^(\w+)!=(.+)$/); + if (neqMatch) { + const [, varName, expectedValue] = neqMatch; + if (!hasOwnContextValue(context, varName)) { + return true; + } + return String(context[varName]) !== expectedValue; + } + + // Equality check + const eqMatch = normalizedCondition.match(/^(\w+)=(.+)$/); + if (eqMatch) { + const [, varName, expectedValue] = eqMatch; + if (!hasOwnContextValue(context, varName)) { + return false; + } + return String(context[varName]) === expectedValue; + } + + // Simple truthy check + return hasOwnContextValue(context, normalizedCondition) && !!context[normalizedCondition]; + } + + private applyOutputFormat(content: string, format?: string): string { + switch (format) { + case "markdown": + // Ensure proper markdown formatting + return content.trim(); + case "json": + // Escape for JSON + return JSON.stringify(content).slice(1, -1); + default: + return content; + } + } +} + +/** + * Global template engine instance + */ +let globalTemplateEngine: TemplateEngine = new TemplateEngine(); + +/** + * Set global template engine + */ +export function setTemplateEngine(engine: TemplateEngine): void { + globalTemplateEngine = engine; +} + +/** + * Get global template engine + */ +export function getTemplateEngine(): TemplateEngine { + return globalTemplateEngine; +} + +/** + * Register a template (convenience function) + */ +export function registerTemplate(template: PromptTemplate): void { + globalTemplateEngine.register(template); +} + +/** + * Render a template (convenience function) + */ +export function renderTemplate(templateName: string, context: TemplateContext): RenderedPrompt { + return globalTemplateEngine.render(templateName, context); +} + +/** + * Built-in templates for common use cases + */ +export const builtInTemplates: PromptTemplate[] = [ + { + name: "default-task", + description: "Default template for task execution", + version: "1.0.0", + variables: [ + { name: "task", description: "Task description", required: true }, + { name: "project", description: "Project name", default: "unknown" }, + { name: "language", description: "Programming language", default: "TypeScript" }, + { name: "rules", description: "Additional rules", default: "" }, + ], + sections: [ + { + name: "header", + content: "# Task: {{task}}\n\nProject: {{project}}\nLanguage: {{language}}", + priority: 0, + }, + { + name: "rules", + content: "\n## Rules\n\n{{rules}}", + condition: "rules", + priority: 10, + }, + { + name: "instructions", + content: + "\n## Instructions\n\nPlease complete the above task following all specified rules.", + priority: 100, + }, + ], + outputFormat: "markdown", + }, + { + name: "code-review", + description: "Template for code review tasks", + version: "1.0.0", + variables: [ + { name: "file", description: "File to review", required: true }, + { name: "content", description: "File content", required: true }, + { name: "focus", description: "Review focus areas", default: "bugs,security,performance" }, + ], + sections: [ + { + name: "header", + content: "# Code Review: {{file}}\n\nFocus areas: {{focus}}", + priority: 0, + }, + { + name: "code", + content: "\n## Code\n\n```\n{{content}}\n```", + priority: 10, + }, + { + name: "instructions", + content: + "\n## Instructions\n\nReview the code above focusing on the specified areas. Provide specific, actionable feedback.", + priority: 100, + }, + ], + outputFormat: "markdown", + }, +]; + +// Register built-in templates on module load +for (const template of builtInTemplates) { + globalTemplateEngine.register(template); +} + +// TemplateEngine is already exported above diff --git a/cli/src/utils/transform.ts b/cli/src/utils/transform.ts new file mode 100644 index 00000000..33c6e4d2 --- /dev/null +++ b/cli/src/utils/transform.ts @@ -0,0 +1,405 @@ +/** + * Data Transformation Layer for Ralphy CLI + * + * Separates prompt building from execution logic with: + * - Transform pipelines + * - Data sanitization + * - Context enrichment + * - Format conversion + */ + +import type { Task } from "../tasks/types.ts"; +import { logDebugContext, logErrorContext } from "../ui/logger.ts"; + +/** + * Context data for transformation + */ +export interface TransformContext { + task: Task; + workDir: string; + engine: string; + config?: Record; + [key: string]: unknown; +} + +/** + * Transformation result + */ +export interface TransformResult { + /** Transformed data */ + data: string; + /** Metadata about the transformation */ + metadata: { + transformer: string; + inputLength: number; + outputLength: number; + processingTimeMs: number; + }; +} + +/** + * Transformer function type + */ +export type Transformer = (input: string, context: TransformContext) => string | Promise; + +/** + * Transformer registration + */ +interface RegisteredTransformer { + name: string; + transformer: Transformer; + priority: number; +} + +/** + * Data transformation pipeline + */ +export class TransformPipeline { + private transformers: RegisteredTransformer[] = []; + private enabled = true; + + /** + * Register a transformer + */ + register(name: string, transformer: Transformer, options?: { priority?: number }): () => void { + const entry: RegisteredTransformer = { + name, + transformer, + priority: options?.priority ?? 0, + }; + + this.transformers.push(entry); + + // Sort by priority (lower first) + this.transformers.sort((a, b) => a.priority - b.priority); + + logDebugContext( + "TransformPipeline", + `Registered transformer: ${name} (priority: ${entry.priority})`, + ); + + // Return unregister function + return () => { + const index = this.transformers.indexOf(entry); + if (index !== -1) { + this.transformers.splice(index, 1); + logDebugContext("TransformPipeline", `Unregistered transformer: ${name}`); + } + }; + } + + /** + * Execute the transformation pipeline + */ + async execute(input: string, context: TransformContext): Promise { + // Lazy-register built-in transformers to avoid circular dependency + registerBuiltInTransformers(this); + + if (!this.enabled || this.transformers.length === 0) { + return { + data: input, + metadata: { + transformer: "passthrough", + inputLength: input.length, + outputLength: input.length, + processingTimeMs: 0, + }, + }; + } + + const startTime = Date.now(); + let result = input; + const appliedTransformers: string[] = []; + + for (const entry of this.transformers) { + try { + result = await entry.transformer(result, context); + appliedTransformers.push(entry.name); + } catch (error) { + logErrorContext("TransformPipeline", `Transformer ${entry.name} failed: ${error}`); + // Continue with other transformers + } + } + + const processingTimeMs = Date.now() - startTime; + + logDebugContext( + "TransformPipeline", + `Applied ${appliedTransformers.length} transformers in ${processingTimeMs}ms`, + ); + + return { + data: result, + metadata: { + transformer: appliedTransformers.join(","), + inputLength: input.length, + outputLength: result.length, + processingTimeMs, + }, + }; + } + + /** + * Enable/disable the pipeline + */ + setEnabled(enabled: boolean): void { + this.enabled = enabled; + logDebugContext("TransformPipeline", `Pipeline ${enabled ? "enabled" : "disabled"}`); + } + + /** + * Clear all transformers + */ + clear(): void { + this.transformers = []; + logDebugContext("TransformPipeline", "All transformers cleared"); + } + + /** + * Get registered transformer names + */ + getTransformerNames(): string[] { + return this.transformers.map((t) => t.name); + } +} + +/** + * Global transform pipeline instance + */ +let globalTransformPipeline: TransformPipeline = new TransformPipeline(); + +/** + * Set global transform pipeline + */ +export function setTransformPipeline(pipeline: TransformPipeline): void { + globalTransformPipeline = pipeline; +} + +/** + * Get global transform pipeline + */ +export function getTransformPipeline(): TransformPipeline { + return globalTransformPipeline; +} + +/** + * Register a transformer (convenience function) + */ +export function registerTransformer( + name: string, + transformer: Transformer, + options?: { priority?: number }, +): () => void { + return globalTransformPipeline.register(name, transformer, options); +} + +/** + * Execute transformation pipeline (convenience function) + */ +export async function transform( + input: string, + context: TransformContext, +): Promise { + return globalTransformPipeline.execute(input, context); +} + +// ============== Built-in Transformers ============== + +/** + * Maximum input length for secret sanitization to prevent ReDoS + */ +const MAX_SANITIZE_INPUT_LENGTH = 1000000; // 1MB + +function truncateToMaxBytes(input: string, maxBytes: number): string { + if (Buffer.byteLength(input, "utf8") <= maxBytes) { + return input; + } + + let end = Math.min(input.length, maxBytes); + while (end > 0 && Buffer.byteLength(input.slice(0, end), "utf8") > maxBytes) { + end--; + } + + return input.slice(0, end); +} + +/** + * Sanitize sensitive data (API keys, passwords, etc.) + * + * SECURITY NOTE: This function includes protections against ReDoS attacks: + * - Input length is limited to MAX_SANITIZE_INPUT_LENGTH + * - All regex patterns use bounded quantifiers (e.g., {48}, {36}) + * - Patterns are applied sequentially with early exit if input becomes too large + */ +export const sanitizeSecretsTransformer: Transformer = (input) => { + // All patterns use bounded quantifiers to prevent ReDoS + // Patterns are designed to match specific token formats with fixed lengths + const patterns = [ + { regex: /sk-[a-zA-Z0-9]{48}/g, replacement: "[API_KEY_REDACTED]" }, + { regex: /sk-ant-[a-zA-Z0-9_-]{16,256}/g, replacement: "[ANTHROPIC_KEY_REDACTED]" }, + { regex: /ghp_[a-zA-Z0-9]{36}/g, replacement: "[GITHUB_TOKEN_REDACTED]" }, + { regex: /gho_[a-zA-Z0-9]{52}/g, replacement: "[GITHUB_OAUTH_REDACTED]" }, + { regex: /AKIA[0-9A-Z]{16}/g, replacement: "[AWS_KEY_REDACTED]" }, + // For hex secrets, use a bounded length and require word boundaries to prevent + // matching large hex strings that could cause performance issues + { regex: /\b[0-9a-f]{64}\b/g, replacement: "[HEX_SECRET_REDACTED]" }, + ]; + + const exceedsMaxBytes = Buffer.byteLength(input, "utf8") > MAX_SANITIZE_INPUT_LENGTH; + const source = exceedsMaxBytes ? truncateToMaxBytes(input, MAX_SANITIZE_INPUT_LENGTH) : input; + + let result = source; + for (const { regex, replacement } of patterns) { + result = result.replace(regex, replacement); + } + + if (exceedsMaxBytes) { + return `${result}\n\n[WARNING: Content truncated due to size limits during secret sanitization]`; + } + + return result; +}; + +/** + * Truncate long content to fit within token limits + */ +export const truncateContentTransformer: Transformer = (input, context) => { + const maxLength = (context.config?.maxPromptLength as number) || 50000; + + if (input.length <= maxLength) { + return input; + } + + // Smart truncation: try to break at a reasonable point + const truncationPoint = input.lastIndexOf("\n\n", maxLength); + const breakPoint = truncationPoint > maxLength * 0.8 ? truncationPoint : maxLength; + + return `${input.substring(0, breakPoint)}\n\n[Content truncated: ${input.length - breakPoint} characters omitted]`; +}; + +/** + * Add metadata header to prompts + */ +export const addMetadataHeaderTransformer: Transformer = (input, context) => { + const timestamp = new Date().toISOString(); + const safeTaskTitle = context.task.title.replace(/-->/g, "--\\>"); + const header = ` + +`; + return header + input; +}; + +/** + * Normalize line endings + */ +export const normalizeLineEndingsTransformer: Transformer = (input) => { + return input.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); +}; + +/** + * Remove excessive blank lines + */ +export const removeExcessiveWhitespaceTransformer: Transformer = (input) => { + return input.replace(/\n{4,}/g, "\n\n\n"); +}; + +/** + * Format code blocks consistently + */ +export const formatCodeBlocksTransformer: Transformer = (input) => { + // Ensure code blocks have language specifiers where detectable + return input.replace(/```\n([\s\S]*?)```/g, (match, code) => { + // Try to detect language from content + let language = ""; + if ( + code.includes("export ") || + code.includes("interface ") || + code.includes("type ") || + code.includes("function ") || + code.includes("const ") || + code.includes("let ") + ) + language = "typescript"; + else if (code.includes("import java.") || code.includes("package ")) language = "java"; + else if (code.includes("def ") || code.includes("from ")) language = "python"; + + if (language) { + return `\`\`\`${language}\n${code}\`\`\``; + } + return match; + }); +}; + +/** + * Strip HTML tags (for plain text output) + */ +export const stripHtmlTagsTransformer: Transformer = (input) => { + return input.replace(/<[^>]*>/g, ""); +}; + +/** + * Enforce token limit estimate (rough approximation) + */ +export const enforceTokenLimitTransformer: Transformer = (input, context) => { + const maxTokens = (context.config?.maxTokens as number) || 8000; + // Rough estimate: 1 token ≈ 4 characters for English text + const estimatedTokens = input.length / 4; + + if (estimatedTokens <= maxTokens) { + return input; + } + + const maxChars = maxTokens * 4; + const truncationPoint = input.lastIndexOf("\n\n", maxChars * 0.9); + const breakPoint = + truncationPoint > maxChars * 0.7 ? truncationPoint : Math.floor(maxChars * 0.9); + + return `${input.substring(0, breakPoint)}\n\n[Token limit reached: ~${Math.floor(estimatedTokens)} tokens estimated, ${input.length - breakPoint} characters omitted]`; +}; + +/** + * Add context from task configuration + */ +export const addTaskContextTransformer: Transformer = (input, context) => { + // Get rules from task or config - safely access nested properties + const taskWithContext = context.task as unknown as { context?: { rules?: string[] } }; + const rules = taskWithContext.context?.rules || []; + if (rules.length === 0) { + return input; + } + + const rulesSection = `\n\n## Task Rules\n\n${rules.map((r: string) => `- ${r}`).join("\n")}`; + return input + rulesSection; +}; + +// Register built-in transformers on module load +const BUILT_IN_TRANSFORMERS = [ + { name: "sanitize-secrets", transformer: sanitizeSecretsTransformer, priority: -100 }, + { name: "normalize-line-endings", transformer: normalizeLineEndingsTransformer, priority: -50 }, + { + name: "remove-excessive-whitespace", + transformer: removeExcessiveWhitespaceTransformer, + priority: -40, + }, + { name: "format-code-blocks", transformer: formatCodeBlocksTransformer, priority: -30 }, + { name: "add-metadata-header", transformer: addMetadataHeaderTransformer, priority: 0 }, + { name: "add-task-context", transformer: addTaskContextTransformer, priority: 10 }, + { name: "truncate-content", transformer: truncateContentTransformer, priority: 90 }, + { name: "enforce-token-limit", transformer: enforceTokenLimitTransformer, priority: 100 }, +]; + +// Register built-in transformers lazily to avoid circular dependency issues +const pipelinesWithBuiltIns = new WeakSet(); +function registerBuiltInTransformers(pipeline: TransformPipeline): void { + if (pipelinesWithBuiltIns.has(pipeline)) return; + pipelinesWithBuiltIns.add(pipeline); + for (const { name, transformer, priority } of BUILT_IN_TRANSFORMERS) { + pipeline.register(name, transformer, { priority }); + } +} + +// TransformPipeline is already exported above