feat(scout-agent): add automatic compaction when token threshold exceeded

blink-so[bot] · blink-so[bot] · commit 3da054d2014b · 2025-12-16T11:05:05.000Z
This commit adds:
- New compaction option in BuildStreamTextParamsOptions to configure:
  - warningThreshold: token count that triggers compaction warning (default: 80% of max)
  - maxTokenThreshold: maximum tokens for context (default: 100k)
  - modelName: model name for token counting
  - Set to false to disable compaction features
- Token counting using ai-tokenizer after message conversion
- Automatic injection of compaction warning message when threshold exceeded
- Logging for token thresholds and warning injection
- Tests for:
  - Compaction tool is included by default
  - Existing compaction summaries are applied
  - Warning message is injected when threshold exceeded
  - Compaction can be disabled with compaction: false
  - Custom thresholds are respected

Note: core.test.ts tests may not run locally due to bun/HTTPParser
incompatibility, but work in CI.
diff --git a/packages/scout-agent/lib/core.test.ts b/packages/scout-agent/lib/core.test.ts
@@ -17,7 +17,11 @@ import {
   mockCoderWorkspace,
   noopLogger,
 } from "./compute/test-utils";
-import { type Message, Scout } from "./index";
+import {
+  COMPACT_CONVERSATION_TOOL_NAME,
+  type Message,
+  Scout,
+} from "./index";
 import { createMockBlinkApiServer, withBlinkApiUrl } from "./test-helpers";
 
 // Add async iterator support to ReadableStream for testing
@@ -948,3 +952,243 @@ describe("coder integration", () => {
     expect(mockClient.getAppHost).toHaveBeenCalled();
   });
 });
+
+describe("compaction", () => {
+  test("buildStreamTextParams includes compaction tool by default", async () => {
+    const agent = new blink.Agent<Message>();
+    const scout = new Scout({
+      agent,
+      logger: noopLogger,
+    });
+
+    const params = await scout.buildStreamTextParams({
+      chatID: "test-chat-id" as blink.ID,
+      messages: [
+        {
+          id: "1",
+          role: "user",
+          parts: [{ type: "text", text: "Hello" }],
+        },
+      ],
+      model: newMockModel({ textResponse: "test" }),
+    });
+
+    // Verify compaction tool is included
+    expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined();
+  });
+
+  test("buildStreamTextParams applies existing compaction summary", async () => {
+    const infoLogs: string[] = [];
+    const mockLogger = {
+      ...noopLogger,
+      info: (...args: unknown[]) => {
+        infoLogs.push(args.map(String).join(" "));
+      },
+    };
+
+    const agent = new blink.Agent<Message>();
+    const scout = new Scout({
+      agent,
+      logger: mockLogger,
+    });
+
+    // Create messages with an existing compaction summary
+    const messagesWithCompaction: Message[] = [
+      {
+        id: "1",
+        role: "user",
+        parts: [{ type: "text", text: "Old message 1" }],
+      },
+      {
+        id: "2",
+        role: "assistant",
+        parts: [{ type: "text", text: "Old response 1" }],
+      },
+      {
+        id: "3",
+        role: "assistant",
+        parts: [
+          {
+            type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`,
+            toolCallId: "tool-call-1",
+            state: "output-available",
+            input: { summary: "Summary of old messages" },
+            output: { summary: "Summary of old messages" },
+          } as unknown as Message["parts"][number],
+        ],
+      },
+      {
+        id: "4",
+        role: "user",
+        parts: [{ type: "text", text: "New message after compaction" }],
+      },
+    ];
+
+    const params = await scout.buildStreamTextParams({
+      chatID: "test-chat-id" as blink.ID,
+      messages: messagesWithCompaction,
+      model: newMockModel({ textResponse: "test" }),
+      // Disable warning threshold to avoid token counting affecting message count
+      compaction: {
+        warningThreshold: Number.MAX_SAFE_INTEGER,
+      },
+    });
+
+    // Verify that compaction was applied (log message)
+    const compactionLog = infoLogs.find((l) =>
+      l.includes("Applied conversation compaction")
+    );
+    expect(compactionLog).toBeDefined();
+    expect(compactionLog).toInclude("4 messages -> 3 messages");
+
+    // Verify messages were processed: should have system + summary + compaction msg + new msg
+    // The converted messages include: system prompt, compaction-summary user msg, 
+    // the assistant msg with tool output, and the new user msg
+    expect(params.messages.length).toBe(4);
+  });
+
+  test("buildStreamTextParams injects warning when token threshold exceeded", async () => {
+    const warnLogs: string[] = [];
+    const infoLogs: string[] = [];
+    const mockLogger = {
+      ...noopLogger,
+      warn: (...args: unknown[]) => {
+        warnLogs.push(args.map(String).join(" "));
+      },
+      info: (...args: unknown[]) => {
+        infoLogs.push(args.map(String).join(" "));
+      },
+    };
+
+    const agent = new blink.Agent<Message>();
+    const scout = new Scout({
+      agent,
+      logger: mockLogger,
+    });
+
+    // Create a message that will exceed a very low threshold
+    const params = await scout.buildStreamTextParams({
+      chatID: "test-chat-id" as blink.ID,
+      messages: [
+        {
+          id: "1",
+          role: "user",
+          parts: [{ type: "text", text: "Hello world, this is a test message." }],
+        },
+      ],
+      model: newMockModel({ textResponse: "test" }),
+      compaction: {
+        // Set a very low threshold so any message exceeds it
+        warningThreshold: 1,
+        maxTokenThreshold: 100,
+      },
+    });
+
+    // Verify warning was logged
+    const warningLog = warnLogs.find((l) =>
+      l.includes("approaching context limit")
+    );
+    expect(warningLog).toBeDefined();
+
+    // Verify info log about injection
+    const injectionLog = infoLogs.find((l) =>
+      l.includes("Injected compaction warning")
+    );
+    expect(injectionLog).toBeDefined();
+
+    // Verify warning message was injected (system + user + warning = 3 messages)
+    expect(params.messages.length).toBe(3);
+
+    // Check that the last message (before system prepend) contains compaction warning
+    const lastUserMessage = params.messages.find(
+      (m) =>
+        m.role === "user" &&
+        typeof m.content === "string" &&
+        m.content.includes("CONTEXT LIMIT WARNING")
+    );
+    expect(lastUserMessage).toBeDefined();
+  });
+
+  test("buildStreamTextParams respects compaction: false to disable", async () => {
+    const warnLogs: string[] = [];
+    const mockLogger = {
+      ...noopLogger,
+      warn: (...args: unknown[]) => {
+        warnLogs.push(args.map(String).join(" "));
+      },
+    };
+
+    const agent = new blink.Agent<Message>();
+    const scout = new Scout({
+      agent,
+      logger: mockLogger,
+    });
+
+    const params = await scout.buildStreamTextParams({
+      chatID: "test-chat-id" as blink.ID,
+      messages: [
+        {
+          id: "1",
+          role: "user",
+          parts: [{ type: "text", text: "Hello world, this is a test message." }],
+        },
+      ],
+      model: newMockModel({ textResponse: "test" }),
+      compaction: false,
+    });
+
+    // Compaction tool should still be available (for manual use)
+    expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined();
+
+    // No warning should be logged even with messages
+    const warningLog = warnLogs.find((l) =>
+      l.includes("approaching context limit")
+    );
+    expect(warningLog).toBeUndefined();
+
+    // Only system + user message (no warning injected)
+    expect(params.messages.length).toBe(2);
+  });
+
+  test("buildStreamTextParams uses custom thresholds", async () => {
+    const warnLogs: string[] = [];
+    const mockLogger = {
+      ...noopLogger,
+      warn: (...args: unknown[]) => {
+        warnLogs.push(args.map(String).join(" "));
+      },
+    };
+
+    const agent = new blink.Agent<Message>();
+    const scout = new Scout({
+      agent,
+      logger: mockLogger,
+    });
+
+    // With a very high threshold, no warning should be injected
+    const params = await scout.buildStreamTextParams({
+      chatID: "test-chat-id" as blink.ID,
+      messages: [
+        {
+          id: "1",
+          role: "user",
+          parts: [{ type: "text", text: "Hello" }],
+        },
+      ],
+      model: newMockModel({ textResponse: "test" }),
+      compaction: {
+        warningThreshold: 1_000_000, // Very high threshold
+        maxTokenThreshold: 2_000_000,
+      },
+    });
+
+    // No warning should be logged
+    const warningLog = warnLogs.find((l) =>
+      l.includes("approaching context limit")
+    );
+    expect(warningLog).toBeUndefined();
+
+    // Only system + user message
+    expect(params.messages.length).toBe(2);
+  });
+});
diff --git a/packages/scout-agent/lib/core.ts b/packages/scout-agent/lib/core.ts
@@ -29,7 +29,13 @@ import {
   githubAppContextFactory,
   handleGitHubWebhook,
 } from "./github";
-import { applyCompaction, createCompactionTool } from "./compaction";
+import {
+  applyCompaction,
+  countConversationTokens,
+  createCompactionTool,
+  createCompactionWarningMessage,
+  DEFAULT_TOKEN_THRESHOLD,
+} from "./compaction";
 import { defaultSystemPrompt } from "./prompt";
 import { createSlackApp, createSlackTools, getSlackMetadata } from "./slack";
 import type { Message } from "./types";
@@ -55,6 +61,33 @@ export interface BuildStreamTextParamsOptions {
    * If not provided, the GitHub auth context will be created using the app ID and private key from the GitHub config.
    */
   getGithubAppContext?: () => Promise<github.AppAuthOptions | undefined>;
+  /**
+   * Configuration for conversation compaction.
+   * If not provided, compaction features are enabled with default thresholds.
+   * Set to `false` to disable compaction entirely.
+   */
+  compaction?:
+    | {
+        /**
+         * Token threshold at which to show a compaction warning.
+         * When the conversation exceeds this threshold, a warning message
+         * is injected asking the model to call the compact_conversation tool.
+         * Default: 80% of maxTokenThreshold (80,000 tokens)
+         */
+        warningThreshold?: number;
+        /**
+         * Maximum token threshold for the conversation.
+         * Used to calculate the percentage in the warning message.
+         * Default: 100,000 tokens
+         */
+        maxTokenThreshold?: number;
+        /**
+         * Model name used for token counting.
+         * Default: derived from the model parameter or "anthropic/claude-sonnet-4"
+         */
+        modelName?: string;
+      }
+    | false;
 }
 
 interface Logger {
@@ -327,6 +360,7 @@ export class Scout {
     tools: providedTools,
     getGithubAppContext,
     systemPrompt = defaultSystemPrompt,
+    compaction: compactionConfig,
   }: BuildStreamTextParamsOptions): Promise<{
     model: LanguageModel;
     messages: ModelMessage[];
@@ -347,15 +381,63 @@ export class Scout {
         )()
       : undefined;
 
+    // Determine if compaction is enabled and get config values
+    const compactionEnabled = compactionConfig !== false;
+    const maxTokenThreshold =
+      (compactionConfig !== false && compactionConfig?.maxTokenThreshold) ||
+      DEFAULT_TOKEN_THRESHOLD;
+    const warningThreshold =
+      (compactionConfig !== false && compactionConfig?.warningThreshold) ||
+      Math.floor(maxTokenThreshold * 0.8);
+    const compactionModelName =
+      (compactionConfig !== false && compactionConfig?.modelName) ||
+      (typeof model === "object" && "modelId" in model
+        ? model.modelId
+        : typeof model === "string"
+          ? model
+          : "anthropic/claude-sonnet-4");
+
     // Apply compaction if a compaction summary exists in the message history
-    const compactedMessages = applyCompaction(messages);
+    let compactedMessages = applyCompaction(messages);
     const wasCompacted = compactedMessages.length !== messages.length;
     if (wasCompacted) {
       this.logger.info(
         `Applied conversation compaction: ${messages.length} messages -> ${compactedMessages.length} messages`
       );
     }
 
+    // Check token count and inject warning message if needed
+    let tokenCount: number | undefined;
+    let compactionWarningInjected = false;
+    if (compactionEnabled && compactedMessages.length > 0) {
+      // We need to convert messages to count tokens accurately
+      // For now, use a temporary conversion to count
+      const tempConverted = convertToModelMessages(compactedMessages, {
+        ignoreIncompleteToolCalls: true,
+      });
+      tokenCount = await countConversationTokens(
+        tempConverted,
+        compactionModelName
+      );
+
+      if (tokenCount >= warningThreshold) {
+        this.logger.warn(
+          `Conversation approaching context limit: ${tokenCount.toLocaleString()} tokens (threshold: ${warningThreshold.toLocaleString()})`
+        );
+
+        // Inject a compaction warning message at the end of the conversation
+        const warningMessage = createCompactionWarningMessage(
+          tokenCount,
+          maxTokenThreshold
+        );
+        compactedMessages = [...compactedMessages, warningMessage];
+        compactionWarningInjected = true;
+        this.logger.info(
+          "Injected compaction warning message to prompt model to compact conversation"
+        );
+      }
+    }
+
     const slackMetadata = getSlackMetadata(compactedMessages);
     const respondingInSlack =
       this.slack.app !== undefined && slackMetadata !== undefined;