Skip to content

Commit 3da054d

Browse files
committed
feat(scout-agent): add automatic compaction when token threshold exceeded
This commit adds: - New compaction option in BuildStreamTextParamsOptions to configure: - warningThreshold: token count that triggers compaction warning (default: 80% of max) - maxTokenThreshold: maximum tokens for context (default: 100k) - modelName: model name for token counting - Set to false to disable compaction features - Token counting using ai-tokenizer after message conversion - Automatic injection of compaction warning message when threshold exceeded - Logging for token thresholds and warning injection - Tests for: - Compaction tool is included by default - Existing compaction summaries are applied - Warning message is injected when threshold exceeded - Compaction can be disabled with compaction: false - Custom thresholds are respected Note: core.test.ts tests may not run locally due to bun/HTTPParser incompatibility, but work in CI.
1 parent f14c7cb commit 3da054d

File tree

2 files changed

+329
-3
lines changed

2 files changed

+329
-3
lines changed

packages/scout-agent/lib/core.test.ts

Lines changed: 245 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ import {
1717
mockCoderWorkspace,
1818
noopLogger,
1919
} from "./compute/test-utils";
20-
import { type Message, Scout } from "./index";
20+
import {
21+
COMPACT_CONVERSATION_TOOL_NAME,
22+
type Message,
23+
Scout,
24+
} from "./index";
2125
import { createMockBlinkApiServer, withBlinkApiUrl } from "./test-helpers";
2226

2327
// Add async iterator support to ReadableStream for testing
@@ -948,3 +952,243 @@ describe("coder integration", () => {
948952
expect(mockClient.getAppHost).toHaveBeenCalled();
949953
});
950954
});
955+
956+
describe("compaction", () => {
957+
test("buildStreamTextParams includes compaction tool by default", async () => {
958+
const agent = new blink.Agent<Message>();
959+
const scout = new Scout({
960+
agent,
961+
logger: noopLogger,
962+
});
963+
964+
const params = await scout.buildStreamTextParams({
965+
chatID: "test-chat-id" as blink.ID,
966+
messages: [
967+
{
968+
id: "1",
969+
role: "user",
970+
parts: [{ type: "text", text: "Hello" }],
971+
},
972+
],
973+
model: newMockModel({ textResponse: "test" }),
974+
});
975+
976+
// Verify compaction tool is included
977+
expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined();
978+
});
979+
980+
test("buildStreamTextParams applies existing compaction summary", async () => {
981+
const infoLogs: string[] = [];
982+
const mockLogger = {
983+
...noopLogger,
984+
info: (...args: unknown[]) => {
985+
infoLogs.push(args.map(String).join(" "));
986+
},
987+
};
988+
989+
const agent = new blink.Agent<Message>();
990+
const scout = new Scout({
991+
agent,
992+
logger: mockLogger,
993+
});
994+
995+
// Create messages with an existing compaction summary
996+
const messagesWithCompaction: Message[] = [
997+
{
998+
id: "1",
999+
role: "user",
1000+
parts: [{ type: "text", text: "Old message 1" }],
1001+
},
1002+
{
1003+
id: "2",
1004+
role: "assistant",
1005+
parts: [{ type: "text", text: "Old response 1" }],
1006+
},
1007+
{
1008+
id: "3",
1009+
role: "assistant",
1010+
parts: [
1011+
{
1012+
type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`,
1013+
toolCallId: "tool-call-1",
1014+
state: "output-available",
1015+
input: { summary: "Summary of old messages" },
1016+
output: { summary: "Summary of old messages" },
1017+
} as unknown as Message["parts"][number],
1018+
],
1019+
},
1020+
{
1021+
id: "4",
1022+
role: "user",
1023+
parts: [{ type: "text", text: "New message after compaction" }],
1024+
},
1025+
];
1026+
1027+
const params = await scout.buildStreamTextParams({
1028+
chatID: "test-chat-id" as blink.ID,
1029+
messages: messagesWithCompaction,
1030+
model: newMockModel({ textResponse: "test" }),
1031+
// Disable warning threshold to avoid token counting affecting message count
1032+
compaction: {
1033+
warningThreshold: Number.MAX_SAFE_INTEGER,
1034+
},
1035+
});
1036+
1037+
// Verify that compaction was applied (log message)
1038+
const compactionLog = infoLogs.find((l) =>
1039+
l.includes("Applied conversation compaction")
1040+
);
1041+
expect(compactionLog).toBeDefined();
1042+
expect(compactionLog).toInclude("4 messages -> 3 messages");
1043+
1044+
// Verify messages were processed: should have system + summary + compaction msg + new msg
1045+
// The converted messages include: system prompt, compaction-summary user msg,
1046+
// the assistant msg with tool output, and the new user msg
1047+
expect(params.messages.length).toBe(4);
1048+
});
1049+
1050+
test("buildStreamTextParams injects warning when token threshold exceeded", async () => {
1051+
const warnLogs: string[] = [];
1052+
const infoLogs: string[] = [];
1053+
const mockLogger = {
1054+
...noopLogger,
1055+
warn: (...args: unknown[]) => {
1056+
warnLogs.push(args.map(String).join(" "));
1057+
},
1058+
info: (...args: unknown[]) => {
1059+
infoLogs.push(args.map(String).join(" "));
1060+
},
1061+
};
1062+
1063+
const agent = new blink.Agent<Message>();
1064+
const scout = new Scout({
1065+
agent,
1066+
logger: mockLogger,
1067+
});
1068+
1069+
// Create a message that will exceed a very low threshold
1070+
const params = await scout.buildStreamTextParams({
1071+
chatID: "test-chat-id" as blink.ID,
1072+
messages: [
1073+
{
1074+
id: "1",
1075+
role: "user",
1076+
parts: [{ type: "text", text: "Hello world, this is a test message." }],
1077+
},
1078+
],
1079+
model: newMockModel({ textResponse: "test" }),
1080+
compaction: {
1081+
// Set a very low threshold so any message exceeds it
1082+
warningThreshold: 1,
1083+
maxTokenThreshold: 100,
1084+
},
1085+
});
1086+
1087+
// Verify warning was logged
1088+
const warningLog = warnLogs.find((l) =>
1089+
l.includes("approaching context limit")
1090+
);
1091+
expect(warningLog).toBeDefined();
1092+
1093+
// Verify info log about injection
1094+
const injectionLog = infoLogs.find((l) =>
1095+
l.includes("Injected compaction warning")
1096+
);
1097+
expect(injectionLog).toBeDefined();
1098+
1099+
// Verify warning message was injected (system + user + warning = 3 messages)
1100+
expect(params.messages.length).toBe(3);
1101+
1102+
// Check that the last message (before system prepend) contains compaction warning
1103+
const lastUserMessage = params.messages.find(
1104+
(m) =>
1105+
m.role === "user" &&
1106+
typeof m.content === "string" &&
1107+
m.content.includes("CONTEXT LIMIT WARNING")
1108+
);
1109+
expect(lastUserMessage).toBeDefined();
1110+
});
1111+
1112+
test("buildStreamTextParams respects compaction: false to disable", async () => {
1113+
const warnLogs: string[] = [];
1114+
const mockLogger = {
1115+
...noopLogger,
1116+
warn: (...args: unknown[]) => {
1117+
warnLogs.push(args.map(String).join(" "));
1118+
},
1119+
};
1120+
1121+
const agent = new blink.Agent<Message>();
1122+
const scout = new Scout({
1123+
agent,
1124+
logger: mockLogger,
1125+
});
1126+
1127+
const params = await scout.buildStreamTextParams({
1128+
chatID: "test-chat-id" as blink.ID,
1129+
messages: [
1130+
{
1131+
id: "1",
1132+
role: "user",
1133+
parts: [{ type: "text", text: "Hello world, this is a test message." }],
1134+
},
1135+
],
1136+
model: newMockModel({ textResponse: "test" }),
1137+
compaction: false,
1138+
});
1139+
1140+
// Compaction tool should still be available (for manual use)
1141+
expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined();
1142+
1143+
// No warning should be logged even with messages
1144+
const warningLog = warnLogs.find((l) =>
1145+
l.includes("approaching context limit")
1146+
);
1147+
expect(warningLog).toBeUndefined();
1148+
1149+
// Only system + user message (no warning injected)
1150+
expect(params.messages.length).toBe(2);
1151+
});
1152+
1153+
test("buildStreamTextParams uses custom thresholds", async () => {
1154+
const warnLogs: string[] = [];
1155+
const mockLogger = {
1156+
...noopLogger,
1157+
warn: (...args: unknown[]) => {
1158+
warnLogs.push(args.map(String).join(" "));
1159+
},
1160+
};
1161+
1162+
const agent = new blink.Agent<Message>();
1163+
const scout = new Scout({
1164+
agent,
1165+
logger: mockLogger,
1166+
});
1167+
1168+
// With a very high threshold, no warning should be injected
1169+
const params = await scout.buildStreamTextParams({
1170+
chatID: "test-chat-id" as blink.ID,
1171+
messages: [
1172+
{
1173+
id: "1",
1174+
role: "user",
1175+
parts: [{ type: "text", text: "Hello" }],
1176+
},
1177+
],
1178+
model: newMockModel({ textResponse: "test" }),
1179+
compaction: {
1180+
warningThreshold: 1_000_000, // Very high threshold
1181+
maxTokenThreshold: 2_000_000,
1182+
},
1183+
});
1184+
1185+
// No warning should be logged
1186+
const warningLog = warnLogs.find((l) =>
1187+
l.includes("approaching context limit")
1188+
);
1189+
expect(warningLog).toBeUndefined();
1190+
1191+
// Only system + user message
1192+
expect(params.messages.length).toBe(2);
1193+
});
1194+
});

packages/scout-agent/lib/core.ts

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ import {
2929
githubAppContextFactory,
3030
handleGitHubWebhook,
3131
} from "./github";
32-
import { applyCompaction, createCompactionTool } from "./compaction";
32+
import {
33+
applyCompaction,
34+
countConversationTokens,
35+
createCompactionTool,
36+
createCompactionWarningMessage,
37+
DEFAULT_TOKEN_THRESHOLD,
38+
} from "./compaction";
3339
import { defaultSystemPrompt } from "./prompt";
3440
import { createSlackApp, createSlackTools, getSlackMetadata } from "./slack";
3541
import type { Message } from "./types";
@@ -55,6 +61,33 @@ export interface BuildStreamTextParamsOptions {
5561
* If not provided, the GitHub auth context will be created using the app ID and private key from the GitHub config.
5662
*/
5763
getGithubAppContext?: () => Promise<github.AppAuthOptions | undefined>;
64+
/**
65+
* Configuration for conversation compaction.
66+
* If not provided, compaction features are enabled with default thresholds.
67+
* Set to `false` to disable compaction entirely.
68+
*/
69+
compaction?:
70+
| {
71+
/**
72+
* Token threshold at which to show a compaction warning.
73+
* When the conversation exceeds this threshold, a warning message
74+
* is injected asking the model to call the compact_conversation tool.
75+
* Default: 80% of maxTokenThreshold (80,000 tokens)
76+
*/
77+
warningThreshold?: number;
78+
/**
79+
* Maximum token threshold for the conversation.
80+
* Used to calculate the percentage in the warning message.
81+
* Default: 100,000 tokens
82+
*/
83+
maxTokenThreshold?: number;
84+
/**
85+
* Model name used for token counting.
86+
* Default: derived from the model parameter or "anthropic/claude-sonnet-4"
87+
*/
88+
modelName?: string;
89+
}
90+
| false;
5891
}
5992

6093
interface Logger {
@@ -327,6 +360,7 @@ export class Scout {
327360
tools: providedTools,
328361
getGithubAppContext,
329362
systemPrompt = defaultSystemPrompt,
363+
compaction: compactionConfig,
330364
}: BuildStreamTextParamsOptions): Promise<{
331365
model: LanguageModel;
332366
messages: ModelMessage[];
@@ -347,15 +381,63 @@ export class Scout {
347381
)()
348382
: undefined;
349383

384+
// Determine if compaction is enabled and get config values
385+
const compactionEnabled = compactionConfig !== false;
386+
const maxTokenThreshold =
387+
(compactionConfig !== false && compactionConfig?.maxTokenThreshold) ||
388+
DEFAULT_TOKEN_THRESHOLD;
389+
const warningThreshold =
390+
(compactionConfig !== false && compactionConfig?.warningThreshold) ||
391+
Math.floor(maxTokenThreshold * 0.8);
392+
const compactionModelName =
393+
(compactionConfig !== false && compactionConfig?.modelName) ||
394+
(typeof model === "object" && "modelId" in model
395+
? model.modelId
396+
: typeof model === "string"
397+
? model
398+
: "anthropic/claude-sonnet-4");
399+
350400
// Apply compaction if a compaction summary exists in the message history
351-
const compactedMessages = applyCompaction(messages);
401+
let compactedMessages = applyCompaction(messages);
352402
const wasCompacted = compactedMessages.length !== messages.length;
353403
if (wasCompacted) {
354404
this.logger.info(
355405
`Applied conversation compaction: ${messages.length} messages -> ${compactedMessages.length} messages`
356406
);
357407
}
358408

409+
// Check token count and inject warning message if needed
410+
let tokenCount: number | undefined;
411+
let compactionWarningInjected = false;
412+
if (compactionEnabled && compactedMessages.length > 0) {
413+
// We need to convert messages to count tokens accurately
414+
// For now, use a temporary conversion to count
415+
const tempConverted = convertToModelMessages(compactedMessages, {
416+
ignoreIncompleteToolCalls: true,
417+
});
418+
tokenCount = await countConversationTokens(
419+
tempConverted,
420+
compactionModelName
421+
);
422+
423+
if (tokenCount >= warningThreshold) {
424+
this.logger.warn(
425+
`Conversation approaching context limit: ${tokenCount.toLocaleString()} tokens (threshold: ${warningThreshold.toLocaleString()})`
426+
);
427+
428+
// Inject a compaction warning message at the end of the conversation
429+
const warningMessage = createCompactionWarningMessage(
430+
tokenCount,
431+
maxTokenThreshold
432+
);
433+
compactedMessages = [...compactedMessages, warningMessage];
434+
compactionWarningInjected = true;
435+
this.logger.info(
436+
"Injected compaction warning message to prompt model to compact conversation"
437+
);
438+
}
439+
}
440+
359441
const slackMetadata = getSlackMetadata(compactedMessages);
360442
const respondingInSlack =
361443
this.slack.app !== undefined && slackMetadata !== undefined;

0 commit comments

Comments
 (0)