diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index cf16df8dcc72..1b68d0c21d8b 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -261,6 +261,8 @@ export class Task extends EventEmitter implements TaskLike { // Tool Use consecutiveMistakeCount: number = 0 consecutiveMistakeLimit: number + consecutiveMistakeGuidanceCount: number = 0 // Track how many times we've asked for guidance + maxConsecutiveMistakeGuidance: number = 3 // Maximum times to ask for guidance before aborting consecutiveMistakeCountForApplyDiff: Map = new Map() toolUsage: ToolUsage = {} @@ -1725,10 +1727,35 @@ export class Task extends EventEmitter implements TaskLike { } if (this.consecutiveMistakeLimit > 0 && this.consecutiveMistakeCount >= this.consecutiveMistakeLimit) { - const { response, text, images } = await this.ask( - "mistake_limit_reached", - t("common:errors.mistake_limit_guidance"), - ) + // Check if we've asked for guidance too many times + if (this.consecutiveMistakeGuidanceCount >= this.maxConsecutiveMistakeGuidance) { + // We've asked for guidance too many times, abort to prevent token burning + await this.say( + "error", + `I've been unable to proceed despite multiple attempts and guidance. The task appears to be stuck in a loop. To prevent excessive token usage, I'm stopping here. Please review the conversation and consider:\n\n1. Providing more specific instructions\n2. Breaking down the task into smaller steps\n3. Checking if there are any environmental issues preventing progress`, + ) + + // Track token burning event in telemetry + // Use captureConsecutiveMistakeError with additional logging for now + TelemetryService.instance.captureConsecutiveMistakeError(this.taskId) + console.error( + `[Task#${this.taskId}] Token burning detected - Guidance count: ${this.consecutiveMistakeGuidanceCount}, Mistake count: ${this.consecutiveMistakeCount}`, + ) + + // Return true to end the task loop + return true + } + + // Increment guidance count before asking + this.consecutiveMistakeGuidanceCount++ + + // Add exponential backoff message if we've asked before + let guidanceMessage = t("common:errors.mistake_limit_guidance") + if (this.consecutiveMistakeGuidanceCount > 1) { + guidanceMessage = `${guidanceMessage}\n\n(Attempt ${this.consecutiveMistakeGuidanceCount}/${this.maxConsecutiveMistakeGuidance} - I'm having difficulty making progress)` + } + + const { response, text, images } = await this.ask("mistake_limit_reached", guidanceMessage) if (response === "messageResponse") { currentUserContent.push( @@ -1740,10 +1767,11 @@ export class Task extends EventEmitter implements TaskLike { await this.say("user_feedback", text, images) - // Track consecutive mistake errors in telemetry. + // Track consecutive mistake errors in telemetry TelemetryService.instance.captureConsecutiveMistakeError(this.taskId) } + // Reset mistake count but keep guidance count this.consecutiveMistakeCount = 0 } @@ -2304,6 +2332,11 @@ export class Task extends EventEmitter implements TaskLike { if (!didToolUse) { this.userMessageContent.push({ type: "text", text: formatResponse.noToolsUsed() }) this.consecutiveMistakeCount++ + + // Log when we're incrementing mistake count for debugging + console.log( + `[Task#${this.taskId}] Consecutive mistake count: ${this.consecutiveMistakeCount}/${this.consecutiveMistakeLimit}, Guidance count: ${this.consecutiveMistakeGuidanceCount}/${this.maxConsecutiveMistakeGuidance}`, + ) } if (this.userMessageContent.length > 0) { @@ -2932,4 +2965,14 @@ export class Task extends EventEmitter implements TaskLike { console.error(`[Task] Queue processing error:`, e) } } + + /** + * Reset consecutive mistake tracking counters. + * This should be called when a tool executes successfully to indicate + * that Roo is making progress and not stuck in a loop. + */ + public resetConsecutiveMistakeCounts(): void { + this.consecutiveMistakeCount = 0 + this.consecutiveMistakeGuidanceCount = 0 + } } diff --git a/src/core/task/__tests__/Task.tokenBurning.spec.ts b/src/core/task/__tests__/Task.tokenBurning.spec.ts new file mode 100644 index 000000000000..ceada1c19835 --- /dev/null +++ b/src/core/task/__tests__/Task.tokenBurning.spec.ts @@ -0,0 +1,276 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest" +import { Task } from "../Task" +import { ClineProvider } from "../../webview/ClineProvider" +import { ApiHandler } from "../../../api" +import { TelemetryService } from "@roo-code/telemetry" +import { DEFAULT_CONSECUTIVE_MISTAKE_LIMIT } from "@roo-code/types" + +// Mock TelemetryService before any imports that might use it +vi.mock("@roo-code/telemetry", () => { + const mockTelemetryService = { + captureConsecutiveMistakeError: vi.fn(), + captureEvent: vi.fn(), + captureTaskCreated: vi.fn(), + captureTaskRestarted: vi.fn(), + captureConversationMessage: vi.fn(), + } + + return { + TelemetryService: { + instance: mockTelemetryService, + initialize: vi.fn(), + }, + } +}) + +describe("Task - Token Burning Prevention", () => { + let mockProvider: any + let mockApiHandler: any + let task: Task + + beforeEach(() => { + // Mock provider + mockProvider = { + context: { + globalStorageUri: { fsPath: "/test/storage" }, + extensionUri: { fsPath: "/test/extension" }, + }, + getState: vi.fn().mockResolvedValue({ + mode: "code", + apiConfiguration: { + apiProvider: "anthropic", + apiKey: "test-key", + }, + }), + postStateToWebview: vi.fn(), + postMessageToWebview: vi.fn(), + updateTaskHistory: vi.fn(), + log: vi.fn(), + } as any + + // Mock API handler + mockApiHandler = { + getModel: vi.fn().mockReturnValue({ + id: "claude-3-opus", + info: { + contextWindow: 200000, + supportsComputerUse: false, + }, + }), + createMessage: vi.fn(), + } as any + }) + + afterEach(() => { + vi.clearAllMocks() + }) + + describe("Consecutive Mistake Guidance Limit", () => { + it("should initialize with default values", () => { + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + task: "test task", + }) + + expect(task.consecutiveMistakeCount).toBe(0) + expect(task.consecutiveMistakeLimit).toBe(DEFAULT_CONSECUTIVE_MISTAKE_LIMIT) + expect(task.consecutiveMistakeGuidanceCount).toBe(0) + expect(task.maxConsecutiveMistakeGuidance).toBe(3) + }) + + it("should reset both counters when resetConsecutiveMistakeCounts is called", () => { + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + task: "test task", + }) + + // Set some values + task.consecutiveMistakeCount = 5 + task.consecutiveMistakeGuidanceCount = 2 + + // Reset + task.resetConsecutiveMistakeCounts() + + // Both should be reset + expect(task.consecutiveMistakeCount).toBe(0) + expect(task.consecutiveMistakeGuidanceCount).toBe(0) + }) + + it("should increment guidance count when asking for user guidance", async () => { + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + consecutiveMistakeLimit: 3, + task: "test task", + }) + + // Mock the ask method to simulate user providing feedback + task.ask = vi.fn().mockResolvedValue({ + response: "messageResponse", + text: "Try a different approach", + images: undefined, + }) + + // Mock the say method + task.say = vi.fn().mockResolvedValue(undefined) + + // Set mistake count to trigger guidance request + task.consecutiveMistakeCount = 3 + task.consecutiveMistakeGuidanceCount = 0 + + // Create a mock recursivelyMakeClineRequests that simulates the guidance flow + const mockUserContent: any[] = [] + const stack = [{ userContent: mockUserContent, includeFileDetails: false }] + + // Simulate the part of recursivelyMakeClineRequests that handles mistakes + if (task.consecutiveMistakeLimit > 0 && task.consecutiveMistakeCount >= task.consecutiveMistakeLimit) { + if (task.consecutiveMistakeGuidanceCount >= task.maxConsecutiveMistakeGuidance) { + // Should not reach here in this test + expect(true).toBe(false) + } + + task.consecutiveMistakeGuidanceCount++ + + const guidanceMessage = + task.consecutiveMistakeGuidanceCount > 1 + ? `I've been making too many mistakes. Could you provide some guidance or corrections to help me proceed?\n\n(Attempt ${task.consecutiveMistakeGuidanceCount}/${task.maxConsecutiveMistakeGuidance} - I'm having difficulty making progress)` + : "I've been making too many mistakes. Could you provide some guidance or corrections to help me proceed?" + + await task.ask("mistake_limit_reached", guidanceMessage) + task.consecutiveMistakeCount = 0 + } + + expect(task.consecutiveMistakeGuidanceCount).toBe(1) + expect(task.consecutiveMistakeCount).toBe(0) + }) + + it("should abort task when guidance limit is exceeded", async () => { + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + consecutiveMistakeLimit: 3, + task: "test task", + }) + + // Mock the say method + task.say = vi.fn().mockResolvedValue(undefined) + + // Set counters to exceed limit + task.consecutiveMistakeCount = 3 + task.consecutiveMistakeGuidanceCount = 3 // Already at max + + // Simulate the check in recursivelyMakeClineRequests + let shouldAbort = false + if (task.consecutiveMistakeLimit > 0 && task.consecutiveMistakeCount >= task.consecutiveMistakeLimit) { + if (task.consecutiveMistakeGuidanceCount >= task.maxConsecutiveMistakeGuidance) { + await task.say( + "error", + `I've been unable to proceed despite multiple attempts and guidance. The task appears to be stuck in a loop. To prevent excessive token usage, I'm stopping here. Please review the conversation and consider:\n\n1. Providing more specific instructions\n2. Breaking down the task into smaller steps\n3. Checking if there are any environmental issues preventing progress`, + ) + + // In the real code, this would capture telemetry + TelemetryService.instance.captureConsecutiveMistakeError(task.taskId) + + shouldAbort = true + } + } + + expect(shouldAbort).toBe(true) + expect(task.say).toHaveBeenCalledWith( + "error", + expect.stringContaining("unable to proceed despite multiple attempts"), + ) + expect(TelemetryService.instance.captureConsecutiveMistakeError).toHaveBeenCalledWith(task.taskId) + }) + + it("should show attempt count in guidance message after first attempt", async () => { + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + consecutiveMistakeLimit: 3, + task: "test task", + }) + + // Mock the ask method + task.ask = vi.fn().mockResolvedValue({ + response: "messageResponse", + text: "Try again", + images: undefined, + }) + + // Set guidance count to simulate second attempt + task.consecutiveMistakeGuidanceCount = 1 + task.consecutiveMistakeCount = 3 + + // Simulate generating the guidance message + let guidanceMessage = + "I've been making too many mistakes. Could you provide some guidance or corrections to help me proceed?" + if (task.consecutiveMistakeGuidanceCount > 0) { + task.consecutiveMistakeGuidanceCount++ // Increment before showing + guidanceMessage = `${guidanceMessage}\n\n(Attempt ${task.consecutiveMistakeGuidanceCount}/${task.maxConsecutiveMistakeGuidance} - I'm having difficulty making progress)` + } else { + task.consecutiveMistakeGuidanceCount++ + } + + await task.ask("mistake_limit_reached", guidanceMessage) + + expect(task.ask).toHaveBeenCalledWith( + "mistake_limit_reached", + expect.stringContaining("(Attempt 2/3 - I'm having difficulty making progress)"), + ) + }) + + it("should log debug information when incrementing mistake count", () => { + const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {}) + + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + consecutiveMistakeLimit: 3, + task: "test task", + }) + + // Simulate incrementing mistake count with logging + task.consecutiveMistakeCount++ + console.log( + `[Task#${task.taskId}] Consecutive mistake count: ${task.consecutiveMistakeCount}/${task.consecutiveMistakeLimit}, Guidance count: ${task.consecutiveMistakeGuidanceCount}/${task.maxConsecutiveMistakeGuidance}`, + ) + + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining("Consecutive mistake count: 1/3, Guidance count: 0/3"), + ) + + consoleSpy.mockRestore() + }) + }) + + describe("Token Burning Detection", () => { + it("should log error when token burning is detected", async () => { + const consoleErrorSpy = vi.spyOn(console, "error").mockImplementation(() => {}) + + task = new Task({ + provider: mockProvider as ClineProvider, + apiConfiguration: { apiProvider: "anthropic", apiKey: "test" }, + task: "test task", + }) + + // Set counters to trigger token burning detection + task.consecutiveMistakeGuidanceCount = 3 + task.consecutiveMistakeCount = 5 + + // Simulate token burning detection + TelemetryService.instance.captureConsecutiveMistakeError(task.taskId) + console.error( + `[Task#${task.taskId}] Token burning detected - Guidance count: ${task.consecutiveMistakeGuidanceCount}, Mistake count: ${task.consecutiveMistakeCount}`, + ) + + expect(consoleErrorSpy).toHaveBeenCalledWith( + expect.stringContaining("Token burning detected - Guidance count: 3, Mistake count: 5"), + ) + + consoleErrorSpy.mockRestore() + }) + }) +}) diff --git a/src/core/tools/executeCommandTool.ts b/src/core/tools/executeCommandTool.ts index 2c7ce0d023e2..89224a984f9c 100644 --- a/src/core/tools/executeCommandTool.ts +++ b/src/core/tools/executeCommandTool.ts @@ -51,7 +51,7 @@ export async function executeCommandTool( return } - task.consecutiveMistakeCount = 0 + task.resetConsecutiveMistakeCounts() command = unescapeHtmlEntities(command) // Unescape HTML entities. const didApprove = await askApproval("command", command) diff --git a/src/core/tools/writeToFileTool.ts b/src/core/tools/writeToFileTool.ts index 5abd96a20aff..c5558ad21edd 100644 --- a/src/core/tools/writeToFileTool.ts +++ b/src/core/tools/writeToFileTool.ts @@ -159,7 +159,7 @@ export async function writeToFileTool( return } - cline.consecutiveMistakeCount = 0 + cline.resetConsecutiveMistakeCounts() // Check if preventFocusDisruption experiment is enabled const provider = cline.providerRef.deref()