diff --git a/bun.lock b/bun.lock index 51f8e99..8811d39 100644 --- a/bun.lock +++ b/bun.lock @@ -257,7 +257,7 @@ }, "packages/scout-agent": { "name": "@blink-sdk/scout-agent", - "version": "0.0.9", + "version": "0.0.10", "dependencies": { "@blink-sdk/compute": "^0.0.15", "@blink-sdk/github": "^0.0.22", @@ -265,6 +265,7 @@ "@blink-sdk/multiplexer": "^0.0.1", "@blink-sdk/slack": "^1.1.2", "@octokit/webhooks": "^14.1.3", + "ai-tokenizer": "^1.0.6", "exa-js": "^2.0.3", }, "devDependencies": { @@ -384,7 +385,7 @@ "@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.23", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.10" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZEBiiv1UhjGjBwUU63pFhLK5LCSlNDb1idY9K1oZHm5/Fda1cuTojf32tOp0opH0RPbPAN/F8fyyNjbU33n9Kw=="], - "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-cybb+k/3Kj9BX+Am1mun3dafZsHQLIzW2A4fu5FVTLSIGXXbcuXwXNNdYMGs+B0y6RYOQ8VHbf1QslMSDIxQMA=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.19", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-BwV7DU/lAm3Xn6iyyvZdWgVxgLu3SNXzl5y57gMvkW4nGhAOV5269IrJzQwGt03bb107sa6H6uJwWxc77zXoGA=="], "@ai-sdk/google": ["@ai-sdk/google@2.0.17", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.10" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-6LyuUrCZuiULg0rUV+kT4T2jG19oUntudorI4ttv1ARkSbwl8A39ue3rA487aDDy6fUScdbGFiV5Yv/o4gidVA=="], @@ -394,7 +395,7 @@ "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.18", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ypv1xXMsgGcNKUP+hglKqtdDuMg68nWHucPPAhIENrbFAI+xCHiqPVN8Zllxyv1TNZwGWUghPxJXU+Mqps0YRQ=="], + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-W41Wc9/jbUVXVwCN/7bWa4IKe8MtxO3EyA0Hfhx6grnmiYlCvpI8neSYWFE0zScXJkgA/YK3BRybzgyiXuu6JA=="], "@ai-sdk/react": ["@ai-sdk/react@2.0.60", "", { "dependencies": { "@ai-sdk/provider-utils": "3.0.10", "ai": "5.0.60", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.25.76 || ^4.1.8" }, "optionalPeers": ["zod"] }, "sha512-Ev0MC0I7eDcCH4FnrHzK48g9bJjyF3F67MMq76qoVsbtcs6fGIO5RjmYgPoFeSo8/yQ5EM6i/14yfcD0oB+moA=="], @@ -1684,7 +1685,7 @@ "@types/mysql": ["@types/mysql@2.15.27", "", { "dependencies": { "@types/node": "*" } }, "sha512-YfWiV16IY0OeBfBCk8+hXKmdTKrKlwKN1MNKAPBu5JYxLwBEZl7QzeEpGnlZb3VMGJrrGmB84gXiH+ofs/TezA=="], - "@types/node": ["@types/node@25.0.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-rl78HwuZlaDIUSeUKkmogkhebA+8K1Hy7tddZuJ3D0xV8pZSfsYGTsliGUol1JPzu9EKnTxPC4L1fiWouStRew=="], + "@types/node": ["@types/node@25.0.2", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-gWEkeiyYE4vqjON/+Obqcoeffmk0NF15WSBwSs7zwVA2bAbTaE0SJ7P0WNGoJn8uE7fiaV5a7dKYIJriEqOrmA=="], "@types/normalize-package-data": ["@types/normalize-package-data@2.4.4", "", {}, "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA=="], @@ -1806,7 +1807,9 @@ "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], - "ai": ["ai@5.0.110", "", { "dependencies": { "@ai-sdk/gateway": "2.0.19", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZBq+5bvef4e5qoIG4U6NJ1UpCPWGjuaWERHXbHu2T2ND3c02nJ2zlnjm+N6zAAplQPxwqm7Sb16mrRX5uQNWtQ=="], + "ai": ["ai@5.0.113", "", { "dependencies": { "@ai-sdk/gateway": "2.0.21", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.19", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-26vivpSO/mzZj0k1Si2IpsFspp26ttQICHRySQiMrtWcRd5mnJMX2a8sG28vmZ38C+JUn1cWmfZrsLMxkSMw9g=="], + + "ai-tokenizer": ["ai-tokenizer@1.0.6", "", { "peerDependencies": { "ai": "^5.0.0" }, "optionalPeers": ["ai"] }, "sha512-GaakQFxen0pRH/HIA4v68ZM40llCH27HUYUSBLK+gVuZ57e53pYJe1xFvSTj4sJJjbWU92m1X6NjPWyeWkFDow=="], "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], @@ -3968,7 +3971,7 @@ "yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="], - "zod": ["zod@4.1.13", "", {}, "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig=="], + "zod": ["zod@4.2.1", "", {}, "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw=="], "zod-to-json-schema": ["zod-to-json-schema@3.24.6", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg=="], @@ -4032,6 +4035,8 @@ "@blink-sdk/scout-agent/tsdown": ["tsdown@0.3.1", "", { "dependencies": { "cac": "^6.7.14", "chokidar": "^4.0.1", "consola": "^3.2.3", "debug": "^4.3.7", "picocolors": "^1.1.1", "pkg-types": "^1.2.1", "rolldown": "nightly", "tinyglobby": "^0.2.10", "unconfig": "^0.6.0", "unplugin-isolated-decl": "^0.7.2", "unplugin-unused": "^0.2.3" }, "bin": { "tsdown": "bin/tsdown.js" } }, "sha512-5WLFU7f2NRnsez0jxi7m2lEQNPvBOdos0W8vHvKDnS6tYTfOfmZ5D2z/G9pFTQSjeBhoi6BFRMybc4LzCOKR8A=="], + "@blink.so/api/zod": ["zod@4.1.13", "", {}, "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig=="], + "@blink.so/compute-protocol-worker/@blink-sdk/compute-protocol": ["@blink-sdk/compute-protocol@0.0.2", "", { "peerDependencies": { "ws": ">= 8", "zod": ">= 4" } }, "sha512-QD89Y4b3EbZjncROb6kwUr1uQV4N3UD9q7Hp2PzL4A2BAzsqk50w7KfN9RxfDiZ3fU7Pectg71T4M8ZCwdJcdQ=="], "@blink.so/site/dotenv": ["dotenv@16.6.1", "", {}, "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow=="], @@ -4042,6 +4047,8 @@ "@blink/desktop/@blink.so/api": ["@blink.so/api@0.0.11", "", { "optionalDependencies": { "@blink-sdk/compute-protocol": ">= 0.0.2" }, "peerDependencies": { "ai": ">= 5", "react": ">= 18", "zod": ">= 4" }, "optionalPeers": ["react"] }, "sha512-4JW0fsGFn8IN5r+FpdbkqXkFqyCXQ8sDXoETdIBczLe3/+JP0Q2ItvN9XtR/eLNIshIL9Yz+gZtB6AVWQIcIWg=="], + "@blink/desktop/ai": ["ai@5.0.110", "", { "dependencies": { "@ai-sdk/gateway": "2.0.19", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZBq+5bvef4e5qoIG4U6NJ1UpCPWGjuaWERHXbHu2T2ND3c02nJ2zlnjm+N6zAAplQPxwqm7Sb16mrRX5uQNWtQ=="], + "@blink/desktop/esbuild": ["esbuild@0.25.10", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.25.10", "@esbuild/android-arm": "0.25.10", "@esbuild/android-arm64": "0.25.10", "@esbuild/android-x64": "0.25.10", "@esbuild/darwin-arm64": "0.25.10", "@esbuild/darwin-x64": "0.25.10", "@esbuild/freebsd-arm64": "0.25.10", "@esbuild/freebsd-x64": "0.25.10", "@esbuild/linux-arm": "0.25.10", "@esbuild/linux-arm64": "0.25.10", "@esbuild/linux-ia32": "0.25.10", "@esbuild/linux-loong64": "0.25.10", "@esbuild/linux-mips64el": "0.25.10", "@esbuild/linux-ppc64": "0.25.10", "@esbuild/linux-riscv64": "0.25.10", "@esbuild/linux-s390x": "0.25.10", "@esbuild/linux-x64": "0.25.10", "@esbuild/netbsd-arm64": "0.25.10", "@esbuild/netbsd-x64": "0.25.10", "@esbuild/openbsd-arm64": "0.25.10", "@esbuild/openbsd-x64": "0.25.10", "@esbuild/openharmony-arm64": "0.25.10", "@esbuild/sunos-x64": "0.25.10", "@esbuild/win32-arm64": "0.25.10", "@esbuild/win32-ia32": "0.25.10", "@esbuild/win32-x64": "0.25.10" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-9RiGKvCwaqxO2owP61uQ4BgNborAQskMR6QusfWzQqv7AZOg5oGehdY2pRJMTKuwxd1IDBP4rSbI5lHzU7SMsQ=="], "@blink/desktop/lucide-react": ["lucide-react@0.544.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-t5tS44bqd825zAW45UQxpG2CvcC4urOwn2TrwSH8u+MjeE+1NnWl6QqeQ/6NdjMqdOygyiT9p3Ev0p1NJykxjw=="], @@ -4886,6 +4893,10 @@ "@blink.so/site/next-auth/@auth/core": ["@auth/core@0.41.0", "", { "dependencies": { "@panva/hkdf": "^1.2.1", "jose": "^6.0.6", "oauth4webapi": "^3.3.0", "preact": "10.24.3", "preact-render-to-string": "6.5.11" }, "peerDependencies": { "@simplewebauthn/browser": "^9.0.1", "@simplewebauthn/server": "^9.0.2", "nodemailer": "^6.8.0" }, "optionalPeers": ["@simplewebauthn/browser", "@simplewebauthn/server", "nodemailer"] }, "sha512-Wd7mHPQ/8zy6Qj7f4T46vg3aoor8fskJm6g2Zyj064oQ3+p0xNZXAV60ww0hY+MbTesfu29kK14Zk5d5JTazXQ=="], + "@blink/desktop/ai/@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-cybb+k/3Kj9BX+Am1mun3dafZsHQLIzW2A4fu5FVTLSIGXXbcuXwXNNdYMGs+B0y6RYOQ8VHbf1QslMSDIxQMA=="], + + "@blink/desktop/ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.18", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ypv1xXMsgGcNKUP+hglKqtdDuMg68nWHucPPAhIENrbFAI+xCHiqPVN8Zllxyv1TNZwGWUghPxJXU+Mqps0YRQ=="], + "@blink/desktop/esbuild/@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.25.10", "", { "os": "aix", "cpu": "ppc64" }, "sha512-0NFWnA+7l41irNuaSVlLfgNT12caWJVLzp5eAVhZ0z1qpxbockccEt3s+149rE64VUI3Ml2zt8Nv5JVc4QXTsw=="], "@blink/desktop/esbuild/@esbuild/android-arm": ["@esbuild/android-arm@0.25.10", "", { "os": "android", "cpu": "arm" }, "sha512-dQAxF1dW1C3zpeCDc5KqIYuZ1tgAdRXNoZP7vkBIRtKZPYe2xVr/d3SkirklCHudW1B45tGiUlz2pUWDfbDD4w=="], diff --git a/packages/scout-agent/lib/compaction.test.ts b/packages/scout-agent/lib/compaction.test.ts new file mode 100644 index 0000000..0d5b62b --- /dev/null +++ b/packages/scout-agent/lib/compaction.test.ts @@ -0,0 +1,625 @@ +/** biome-ignore-all lint/suspicious/noExplicitAny: testing */ +import { describe, expect, test } from "bun:test"; +import { + applyCompaction, + COMPACT_CONVERSATION_TOOL_NAME, + createCompactionMessage, + createCompactionTool, + findCompactionSummary, + prepareTruncatedMessages, +} from "./compaction"; +import type { Message } from "./types"; + +describe("compaction", () => { + describe("findCompactionSummary", () => { + test("returns null when no compaction exists", () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + { + id: "2", + role: "assistant", + parts: [{ type: "text", text: "Hi there!" }], + }, + ]; + + expect(findCompactionSummary(messages)).toBeNull(); + }); + + test("finds compaction summary in assistant message", () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + { + id: "2", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { + summary: "This is the summary of the conversation.", + compacted_at: "2024-01-01T00:00:00.000Z", + }, + } as any, + ], + }, + { + id: "3", + role: "user", + parts: [{ type: "text", text: "Continue" }], + }, + ]; + + const result = findCompactionSummary(messages); + expect(result).not.toBeNull(); + expect(result?.index).toBe(1); + expect(result?.summary).toBe("This is the summary of the conversation."); + }); + + test("finds most recent compaction when multiple exist", () => { + const messages: Message[] = [ + { + id: "1", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { summary: "First summary" }, + } as any, + ], + }, + { + id: "2", + role: "user", + parts: [{ type: "text", text: "More conversation" }], + }, + { + id: "3", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { summary: "Second summary" }, + } as any, + ], + }, + ]; + + const result = findCompactionSummary(messages); + expect(result?.index).toBe(2); + expect(result?.summary).toBe("Second summary"); + }); + + test("ignores compaction tool in non-output-available state", () => { + const messages: Message[] = [ + { + id: "1", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "input-available", + input: { summary: "Not yet complete" }, + } as any, + ], + }, + ]; + + expect(findCompactionSummary(messages)).toBeNull(); + }); + + test("returns preservedMessageIds when present in output", () => { + const preservedIds = ["msg-4", "msg-5"]; + const messages: Message[] = [ + { + id: "1", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { + summary: "Emergency summary", + preservedMessageIds: preservedIds, + }, + } as any, + ], + }, + ]; + + const result = findCompactionSummary(messages); + expect(result).not.toBeNull(); + expect(result?.preservedMessageIds).toEqual(preservedIds); + }); + }); + + describe("applyCompaction", () => { + test("returns original messages when no compaction exists", () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + ]; + + const result = applyCompaction(messages); + expect(result).toEqual(messages); + }); + + test("replaces messages before compaction with summary", () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Old message 1" }], + }, + { + id: "2", + role: "assistant", + parts: [{ type: "text", text: "Old response 1" }], + }, + { + id: "3", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { summary: "Summary of old messages" }, + } as any, + ], + }, + { + id: "4", + role: "user", + parts: [{ type: "text", text: "New message" }], + }, + ]; + + const result = applyCompaction(messages); + + // Should have: summary message + new message (compaction message excluded) + expect(result.length).toBe(2); + + // First message should be the summary + expect(result[0]?.id).toBe("compaction-summary"); + expect(result[0]?.role).toBe("user"); + expect(result[0]?.parts[0]?.type).toBe("text"); + expect((result[0]?.parts[0] as { text: string }).text).toInclude( + "Summary of old messages" + ); + + // Should include messages after the compaction point (excluding compaction itself) + expect(result[1]?.id).toBe("4"); + }); + + test("keeps preserved messages by ID when preservedMessageIds is present", () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Old message 1" }], + }, + { + id: "2", + role: "assistant", + parts: [{ type: "text", text: "Old response 1" }], + }, + { + id: "3", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { + summary: "Summary of old messages", + preservedMessageIds: ["4", "5"], // Preserve specific messages + }, + } as any, + ], + }, + { + id: "4", + role: "user", + parts: [{ type: "text", text: "Preserved message 1" }], + }, + { + id: "5", + role: "assistant", + parts: [{ type: "text", text: "Preserved message 2" }], + }, + { + id: "6", + role: "user", + parts: [{ type: "text", text: "New message after compaction" }], + }, + ]; + + const result = applyCompaction(messages); + + // Should have: summary message + preserved messages (4, 5) + new message (6) + // Compaction tool call (3) is excluded since summary already contains the info + expect(result.length).toBe(4); + + // First message should be the summary + expect(result[0]?.id).toBe("compaction-summary"); + expect((result[0]?.parts[0] as { text: string }).text).toInclude( + "Summary of old messages" + ); + + // Should include messages after compaction point (excluding the compaction itself) + expect(result[1]?.id).toBe("4"); + expect(result[2]?.id).toBe("5"); + expect(result[3]?.id).toBe("6"); // new message after compaction is preserved + }); + }); + + describe("createCompactionTool", () => { + test("creates tool with correct name and schema", () => { + const tools = createCompactionTool(); + + expect(tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); + expect(tools[COMPACT_CONVERSATION_TOOL_NAME].description).toInclude( + "Compact the conversation history" + ); + }); + + test("tool execute returns summary in result", async () => { + const tools = createCompactionTool(); + const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; + + const result = (await compactionTool.execute?.( + { summary: "Test summary content" }, + { abortSignal: new AbortController().signal } as any + )) as { summary: string; compacted_at: string; message: string }; + + expect(result.summary).toBe("Test summary content"); + expect(result.compacted_at).toBeDefined(); + expect(result.message).toInclude("compacted"); + }); + + test("tool execute includes preservedMessageIds when provided", async () => { + const preservedIds = ["msg-4", "msg-5", "msg-6"]; + const tools = createCompactionTool(preservedIds); + const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; + + const result = (await compactionTool.execute?.( + { summary: "Emergency summary" }, + { abortSignal: new AbortController().signal } as any + )) as { + summary: string; + compacted_at: string; + message: string; + preservedMessageIds?: string[]; + }; + + expect(result.summary).toBe("Emergency summary"); + expect(result.preservedMessageIds).toEqual(preservedIds); + }); + + test("tool execute does not include preservedMessageIds when not provided", async () => { + const tools = createCompactionTool(); + const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; + + const result = (await compactionTool.execute?.( + { summary: "Normal summary" }, + { abortSignal: new AbortController().signal } as any + )) as { + summary: string; + compacted_at: string; + message: string; + preservedMessageIds?: string[]; + }; + + expect(result.preservedMessageIds).toBeUndefined(); + }); + }); + + describe("createCompactionMessage", () => { + test("creates compaction message with token info when provided", () => { + const message = createCompactionMessage({ + tokenCount: 80000, + threshold: 100000, + }); + + expect(message.id).toStartWith("compaction-request-"); + expect(message.role).toBe("user"); + const textPart = message.parts[0] as { text: string }; + expect(textPart.text).toInclude("80%"); + expect(textPart.text).toInclude("80,000"); + expect(textPart.text).toInclude("compact_conversation"); + }); + + test("creates compaction message without token info when not provided", () => { + const message = createCompactionMessage(); + + expect(message.id).toStartWith("compaction-request-"); + expect(message.role).toBe("user"); + const textPart = message.parts[0] as { text: string }; + expect(textPart.text).toInclude("compact_conversation"); + expect(textPart.text).not.toInclude("%"); // No percentage + }); + }); + + describe("prepareTruncatedMessages", () => { + test("returns empty arrays for empty messages", async () => { + const result = await prepareTruncatedMessages({ + messages: [], + tokenLimit: 1000, + modelName: "anthropic/claude-sonnet-4", + }); + + expect(result.messagesToProcess).toEqual([]); + expect(result.messagesToPreserve).toEqual([]); + }); + + test("includes all messages when under token limit", async () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + { + id: "2", + role: "assistant", + parts: [{ type: "text", text: "Hi there!" }], + }, + ]; + + const result = await prepareTruncatedMessages({ + messages, + tokenLimit: 100000, // Very high limit + modelName: "anthropic/claude-sonnet-4", + }); + + expect(result.messagesToProcess.length).toBe(2); + expect(result.messagesToPreserve.length).toBe(0); + }); + + test("truncates messages when over token limit", async () => { + // Create messages with enough content to have measurable tokens + const messages: Message[] = Array.from({ length: 10 }, (_, i) => ({ + id: `${i + 1}`, + role: i % 2 === 0 ? "user" : "assistant", + parts: [ + { + type: "text", + text: `This is message number ${i + 1} with some additional content to increase token count.`, + }, + ], + })) as Message[]; + + const result = await prepareTruncatedMessages({ + messages, + tokenLimit: 100, // Low limit to force truncation + modelName: "anthropic/claude-sonnet-4", + }); + + // Should have truncated - not all messages in messagesToProcess + expect(result.messagesToProcess.length).toBeLessThan(10); + expect(result.messagesToProcess.length).toBeGreaterThan(0); + + // The rest should be in messagesToPreserve + expect( + result.messagesToProcess.length + result.messagesToPreserve.length + ).toBe(10); + + // First message should be in messagesToProcess (oldest first) + expect(result.messagesToProcess[0]?.id).toBe("1"); + }); + + test("includes at least one message even if it exceeds token limit", async () => { + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [ + { + type: "text", + text: "This is a message with enough content to exceed a very small token limit.", + }, + ], + }, + ]; + + const result = await prepareTruncatedMessages({ + messages, + tokenLimit: 1, // Impossibly small limit + modelName: "anthropic/claude-sonnet-4", + }); + + // Should still include the one message + expect(result.messagesToProcess.length).toBe(1); + expect(result.messagesToPreserve.length).toBe(0); + }); + + }); + + describe("processCompaction", () => { + const noopLogger = { + info: () => {}, + warn: () => {}, + error: () => {}, + }; + + test("returns empty compactionTool when under soft threshold", async () => { + const { processCompaction } = await import("./compaction"); + + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + ]; + + const result = await processCompaction({ + messages, + softTokenThreshold: 1_000_000, // Very high threshold + hardTokenThreshold: 1_100_000, + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }); + + expect(result.messages).toEqual(messages); + expect(Object.keys(result.compactionTool)).toHaveLength(0); + }); + + test("returns compactionTool when soft threshold exceeded", async () => { + const { processCompaction } = await import("./compaction"); + + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [ + { type: "text", text: "Hello world, this is a test message." }, + ], + }, + ]; + + const result = await processCompaction({ + messages, + softTokenThreshold: 1, // Very low threshold + hardTokenThreshold: 100_000, // High hard threshold so no truncation + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }); + + // Should have compaction tool + expect(Object.keys(result.compactionTool)).toHaveLength(1); + expect( + result.compactionTool[COMPACT_CONVERSATION_TOOL_NAME] + ).toBeDefined(); + + // Should have injected compaction message + expect(result.messages.length).toBe(2); + const compactionRequest = result.messages.find((m) => + m.id.startsWith("compaction-request-") + ); + expect(compactionRequest).toBeDefined(); + }); + + test("applies existing compaction summary", async () => { + const { processCompaction } = await import("./compaction"); + + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Old message" }], + }, + { + id: "2", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + state: "output-available", + output: { summary: "Summary of conversation" }, + } as any, + ], + }, + { + id: "3", + role: "user", + parts: [{ type: "text", text: "New message" }], + }, + ]; + + const result = await processCompaction({ + messages, + softTokenThreshold: 1_000_000, // High threshold so no new compaction + hardTokenThreshold: 1_100_000, + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }); + + // Should have applied compaction (summary + new message, compaction tool call excluded) + expect(result.messages.length).toBe(2); + expect(result.messages[0]?.id).toBe("compaction-summary"); + expect(result.messages[1]?.id).toBe("3"); + }); + + test("throws error when soft threshold >= hard threshold", async () => { + const { processCompaction } = await import("./compaction"); + + const messages: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + ]; + + await expect( + processCompaction({ + messages, + softTokenThreshold: 100_000, + hardTokenThreshold: 100_000, // Equal to soft - invalid + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }) + ).rejects.toThrow("Soft token threshold"); + + await expect( + processCompaction({ + messages, + softTokenThreshold: 200_000, + hardTokenThreshold: 100_000, // Less than soft - invalid + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }) + ).rejects.toThrow("Soft token threshold"); + }); + + test("truncates messages at hard threshold and preserves rest", async () => { + const { processCompaction } = await import("./compaction"); + + // Create enough messages to exceed soft threshold but require truncation at hard + // Each message is ~25 tokens, so 20 messages = ~500 tokens + const messages: Message[] = Array.from({ length: 20 }, (_, i) => ({ + id: `${i + 1}`, + role: i % 2 === 0 ? "user" : "assistant", + parts: [ + { + type: "text", + text: `Message ${i + 1}: This is a longer message with additional content to generate more tokens for testing purposes.`, + }, + ], + })) as Message[]; + + const result = await processCompaction({ + messages, + softTokenThreshold: 1, // Trigger compaction immediately + hardTokenThreshold: 300, // ~12 messages worth, forces truncation + model: "anthropic/claude-sonnet-4", + logger: noopLogger, + }); + + // Should have compaction tool with preserved message IDs + expect(Object.keys(result.compactionTool)).toHaveLength(1); + + // Messages should be truncated (fewer than original 20 + compaction message) + // With 300 token limit and ~25 tokens per message, expect ~12 messages + compaction = 13 + expect(result.messages.length).toBeLessThan(21); + expect(result.messages.length).toBeGreaterThan(0); + + // Last message should be compaction request + const lastMessage = result.messages[result.messages.length - 1]; + expect(lastMessage?.id).toMatch(/^compaction-request-/); + }); + }); +}); diff --git a/packages/scout-agent/lib/compaction.ts b/packages/scout-agent/lib/compaction.ts new file mode 100644 index 0000000..ef1cc38 --- /dev/null +++ b/packages/scout-agent/lib/compaction.ts @@ -0,0 +1,435 @@ +import { + convertToModelMessages, + type LanguageModel, + type ModelMessage, + type Tool, + tool, +} from "ai"; +import { z } from "zod"; +import type { Logger, Message } from "./types"; + +/** + * Tool name for conversation compaction. + * Used to identify compaction tool results in message history. + */ +export const COMPACT_CONVERSATION_TOOL_NAME = "compact_conversation" as const; + +/** + * Default soft token threshold for triggering compaction. + * When conversation tokens reach this limit, compaction is triggered. + */ +export const DEFAULT_SOFT_TOKEN_THRESHOLD = 180_000; + +/** + * Default hard token threshold for compaction. + * Messages beyond this limit are excluded from compaction and preserved. + * Must be greater than soft threshold. + */ +export const DEFAULT_HARD_TOKEN_THRESHOLD = 190_000; + +/** + * Get the model configuration for token counting. + * Defaults to Claude Sonnet if model not found. + */ +function getModelConfig(models: Record, modelName: string) { + // Try to find exact match first + if (modelName in models) { + return models[modelName as keyof typeof models]; + } + // Default to Claude Sonnet for Anthropic models + if (modelName.includes("anthropic") || modelName.includes("claude")) { + return models["anthropic/claude-sonnet-4"]; + } + // Default to GPT-5 for OpenAI models + if (modelName.includes("openai") || modelName.includes("gpt")) { + return models["openai/gpt-5"]; + } + // Fallback + return models["anthropic/claude-sonnet-4"]; +} + +/** + * Result of counting tokens for messages. + */ +export interface TokenCountResult { + /** Total tokens across all messages */ + total: number; + /** Token count for each message */ + perMessage: number[]; +} + +/** + * Counts tokens for messages using ai-tokenizer. + * Returns both total and per-message token counts for efficient processing. + */ +export async function countConversationTokens( + messages: ModelMessage[], + modelName: string = "anthropic/claude-sonnet-4" +): Promise { + // we import the modules dynamically because otherwise the + // agent starts up super slow and blink cloud times out during deployment + const aiTokenizer = await import("ai-tokenizer"); + const encoding = await import("ai-tokenizer/encoding/o200k_base"); + const tokenizerSdk = await import("ai-tokenizer/sdk"); + + const model = getModelConfig(aiTokenizer.models, modelName); + const tokenizer = new aiTokenizer.Tokenizer(encoding); + + const result = tokenizerSdk.count({ + // biome-ignore lint/suspicious/noExplicitAny: weird typing error + tokenizer: tokenizer as any, + // biome-ignore lint/suspicious/noExplicitAny: weird typing error + model: model as any, + messages, + }); + + return { + total: result.total, + perMessage: result.messages.map((m) => m.total), + }; +} + +/** + * Finds the most recent compaction summary in the message history. + * Returns the index of the message containing the compaction, the summary text, + * and optionally the preserved message IDs. + */ +export function findCompactionSummary(messages: Message[]): { + index: number; + summary: string; + preservedMessageIds?: string[]; +} | null { + // Search from the end to find the most recent compaction + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role !== "assistant") { + continue; + } + + for (const part of message.parts) { + // Check if this is our compaction tool + if (part.type === `tool-${COMPACT_CONVERSATION_TOOL_NAME}`) { + const toolPart = part as { + state: string; + output?: { summary?: string; preservedMessageIds?: string[] }; + }; + if (toolPart.state === "output-available" && toolPart.output?.summary) { + return { + index: i, + summary: toolPart.output.summary, + preservedMessageIds: toolPart.output.preservedMessageIds, + }; + } + } + } + } + return null; +} + +/** + * Processes messages to apply compaction if a compaction summary exists. + * Returns messages with history before the compaction replaced by a summary message. + */ +export function applyCompaction(messages: Message[]): Message[] { + const compaction = findCompactionSummary(messages); + if (!compaction) { + return messages; + } + + // Create a synthetic user message with the compacted summary + const summaryMessage: Message = { + id: "compaction-summary", + role: "user", + parts: [ + { + type: "text", + text: `[CONVERSATION SUMMARY - Previous messages have been compacted to save context space]\n\n${compaction.summary}\n\n[END OF SUMMARY - Conversation continues below]`, + }, + ], + }; + + // Get messages after the compaction point (excludes the compaction tool call itself) + const messagesAfterCompaction = messages.slice(compaction.index + 1); + + // Check for preserved message IDs (from hard threshold truncation) + if ( + compaction.preservedMessageIds && + compaction.preservedMessageIds.length > 0 + ) { + // Keep summary + preserved messages by ID + messages after compaction + const preservedIdSet = new Set(compaction.preservedMessageIds); + const preserved = messages.filter((m) => preservedIdSet.has(m.id)); + + // Combine preserved messages with messages after compaction (deduplicated) + const afterCompactionIds = new Set( + messagesAfterCompaction.map((m) => m.id) + ); + const preservedNotInAfter = preserved.filter( + (m) => !afterCompactionIds.has(m.id) + ); + + return [summaryMessage, ...preservedNotInAfter, ...messagesAfterCompaction]; + } + + // Normal compaction: keep messages from the compaction point onwards + return [summaryMessage, ...messagesAfterCompaction]; +} + +/** + * Creates the compact_conversation tool. + * This tool should be called by the model when the conversation is getting too long. + * + * @param preservedMessageIds - Optional array of message IDs that should be preserved + * after compaction. Used during emergency compaction to track which recent messages + * were not sent to the model but should be restored after the summary. + */ +export function createCompactionTool( + preservedMessageIds?: string[] +): Record { + return { + [COMPACT_CONVERSATION_TOOL_NAME]: tool({ + description: `Compact the conversation history to save context space. Call this tool when instructed that the conversation is approaching context limits. Provide a detailed and thorough summary that captures: +- The main topics discussed +- Key decisions made +- Important code changes or file modifications (include file paths and what was changed) +- Any ongoing tasks or action items +- Critical context needed to continue the conversation +- Relevant technical details, configurations, or environment information +- Any errors encountered and how they were resolved + +Be thorough and detailed. This summary will replace the earlier conversation history, so include all information needed to continue effectively.`, + inputSchema: z.object({ + summary: z + .string() + .describe( + "A detailed and thorough summary of the conversation so far, including all important context needed to continue effectively." + ), + }), + execute: async ({ summary }) => { + // The summary is stored in the tool result and will be processed + // by applyCompaction() on subsequent messages + return { + summary, + compacted_at: new Date().toISOString(), + message: + "Conversation history has been compacted. The summary will be used to maintain context in future messages.", + ...(preservedMessageIds && + preservedMessageIds.length > 0 && { preservedMessageIds }), + }; + }, + }), + }; +} + +/** + * Creates a compaction request message asking the model to summarize the conversation. + * Uses a consistent ID ("compaction-request") for retry detection. + */ +export function createCompactionMessage(options?: { + tokenCount?: number; + threshold?: number; +}): Message { + let contextInfo = ""; + if (options?.tokenCount && options?.threshold) { + const percentUsed = Math.round( + (options.tokenCount / options.threshold) * 100 + ); + contextInfo = `\n\nThe conversation has used approximately ${percentUsed}% of the available context (${options.tokenCount.toLocaleString()} tokens).`; + } + + return { + id: `compaction-request-${Date.now()}`, + role: "user", + parts: [ + { + type: "text", + text: `[SYSTEM NOTICE - CONTEXT LIMIT]${contextInfo} + +To prevent context overflow errors, please call the \`compact_conversation\` tool NOW to summarize the conversation history. + +Provide a detailed and thorough summary that captures all important context, decisions, code changes, file paths, and ongoing tasks. Do not leave out important details.`, + }, + ], + }; +} + +/** + * Options for preparing truncated messages. + */ +export interface PrepareTruncatedMessagesOptions { + /** All messages to consider for truncation */ + messages: Message[]; + /** Maximum token count for messages to process */ + tokenLimit: number; + /** Model name for token counting */ + modelName: string; +} + +/** + * Result of preparing truncated messages. + */ +export interface PrepareTruncatedMessagesResult { + /** Messages to send for summarization (older messages, within token limit) */ + messagesToProcess: Message[]; + /** Messages to preserve and restore after compaction */ + messagesToPreserve: Message[]; +} + +/** + * Prepares messages for a truncated compaction attempt. + * Accumulates messages from the start (oldest first) until adding more would exceed the token limit. + * + * @returns Messages split into those to process (summarize) and those to preserve + */ +export async function prepareTruncatedMessages( + options: PrepareTruncatedMessagesOptions +): Promise { + const { messages, tokenLimit, modelName } = options; + + if (messages.length === 0) { + return { messagesToProcess: [], messagesToPreserve: [] }; + } + + // Convert all messages once and get per-message token counts + const converted = convertToModelMessages(messages, { + ignoreIncompleteToolCalls: true, + }); + const { perMessage } = await countConversationTokens(converted, modelName); + + // Find the split point by accumulating token counts + // until we would exceed the token limit + let splitPoint = 0; + let cumulativeTokens = 0; + + for (let i = 0; i < perMessage.length; i++) { + cumulativeTokens += perMessage[i] ?? 0; + if (cumulativeTokens > tokenLimit) { + // Adding this message would exceed the limit + break; + } + splitPoint = i + 1; + } + + // Ensure we have at least one message to process (if possible) + if (splitPoint === 0 && messages.length > 0) { + // Even the first message exceeds the limit, but we need to process something + splitPoint = 1; + } + + const messagesToProcess = messages.slice(0, splitPoint); + const messagesToPreserve = messages.slice(splitPoint); + + return { + messagesToProcess, + messagesToPreserve, + }; +} + +/** + * Options for processing compaction. + */ +export interface ProcessCompactionOptions { + messages: Message[]; + /** Soft threshold - triggers compaction when reached */ + softTokenThreshold: number; + /** Hard threshold - max tokens to send for compaction; rest are preserved */ + hardTokenThreshold: number; + model: LanguageModel | string; + logger: Logger; +} + +/** + * Result of processing compaction. + */ +export interface ProcessCompactionResult { + messages: Message[]; + compactionTool: Record; +} + +/** + * Extracts model name from a LanguageModel or string. + */ +function getModelName(model: LanguageModel | string): string { + if (typeof model === "string") { + return model; + } + if ("modelId" in model) { + return model.modelId; + } + return "anthropic/claude-sonnet-4"; +} + +/** + * Processes messages for compaction. + * Applies any existing compaction summary, checks token count against soft threshold, + * and truncates at hard threshold when compacting. + */ +export async function processCompaction( + options: ProcessCompactionOptions +): Promise { + const { messages, softTokenThreshold, hardTokenThreshold, model, logger } = + options; + + // Validate thresholds + if (softTokenThreshold >= hardTokenThreshold) { + throw new Error( + `Soft token threshold (${softTokenThreshold}) must be less than hard token threshold (${hardTokenThreshold})` + ); + } + + const modelName = getModelName(model); + + // Apply compaction if a compaction summary exists in the message history + const compactedMessages = applyCompaction(messages); + if (compactedMessages.length === 0) { + return { messages: [], compactionTool: {} }; + } + + // Check token count and handle compaction + let preservedMessageIds: string[] | undefined; + + // We need to convert messages to count tokens accurately + const tempConverted = convertToModelMessages(compactedMessages, { + ignoreIncompleteToolCalls: true, + }); + const { total: tokenCount } = await countConversationTokens( + tempConverted, + modelName + ); + + if (tokenCount < softTokenThreshold) { + return { messages: compactedMessages, compactionTool: {} }; + } + + // Soft threshold reached - trigger compaction + logger.info( + `Conversation approaching context limit: ${tokenCount.toLocaleString()} tokens (soft threshold: ${softTokenThreshold.toLocaleString()})` + ); + + // Truncate messages at hard threshold to ensure compaction request fits + const { messagesToProcess, messagesToPreserve } = + await prepareTruncatedMessages({ + messages: compactedMessages, + tokenLimit: hardTokenThreshold, + modelName, + }); + + // Store preserved message IDs for the compaction tool result + if (messagesToPreserve.length > 0) { + preservedMessageIds = messagesToPreserve.map((m) => m.id); + logger.info( + `Compaction: sending ${messagesToProcess.length} messages for summarization, preserving ${messagesToPreserve.length} recent messages` + ); + } + + return { + messages: [ + ...messagesToProcess, + createCompactionMessage({ + tokenCount, + threshold: softTokenThreshold, + }), + ], + compactionTool: createCompactionTool(preservedMessageIds), + }; +} diff --git a/packages/scout-agent/lib/core.test.ts b/packages/scout-agent/lib/core.test.ts index 7f10136..0940bf6 100644 --- a/packages/scout-agent/lib/core.test.ts +++ b/packages/scout-agent/lib/core.test.ts @@ -17,7 +17,7 @@ import { mockCoderWorkspace, noopLogger, } from "./compute/test-utils"; -import { type Message, Scout } from "./index"; +import { COMPACT_CONVERSATION_TOOL_NAME, type Message, Scout } from "./index"; import { createMockBlinkApiServer, withBlinkApiUrl } from "./test-helpers"; // Add async iterator support to ReadableStream for testing @@ -948,3 +948,474 @@ describe("coder integration", () => { expect(mockClient.getAppHost).toHaveBeenCalled(); }); }); + +describe("compaction", () => { + test("buildStreamTextParams does not include compaction tool when under threshold", async () => { + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: noopLogger, + }); + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + ], + model: newMockModel({ textResponse: "test" }), + }); + + // Verify compaction tool is NOT included when under threshold + expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); + }); + + test("buildStreamTextParams applies existing compaction summary", async () => { + const infoLogs: string[] = []; + const mockLogger = { + ...noopLogger, + info: (...args: unknown[]) => { + infoLogs.push(args.map(String).join(" ")); + }, + }; + + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: mockLogger, + }); + + // Create messages with an existing compaction summary + const messagesWithCompaction: Message[] = [ + { + id: "1", + role: "user", + parts: [{ type: "text", text: "Old message 1" }], + }, + { + id: "2", + role: "assistant", + parts: [{ type: "text", text: "Old response 2" }], + }, + { + id: "3", + role: "user", + parts: [{ type: "text", text: "Old message 3" }], + }, + { + id: "4", + role: "assistant", + parts: [{ type: "text", text: "Old response 4" }], + }, + { + id: "5", + role: "user", + parts: [{ type: "text", text: "Old message 5" }], + }, + { + id: "6", + role: "assistant", + parts: [{ type: "text", text: "Old response 6" }], + }, + { + id: "7", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + toolCallId: "tool-call-1", + state: "output-available", + input: { summary: "Summary of old messages" }, + output: { summary: "Summary of old messages" }, + } as unknown as Message["parts"][number], + ], + }, + { + id: "8", + role: "user", + parts: [{ type: "text", text: "New message after compaction" }], + }, + ]; + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: messagesWithCompaction, + model: newMockModel({ textResponse: "test" }), + // Disable threshold to avoid token counting affecting message count + compaction: { + softThreshold: Number.MAX_SAFE_INTEGER - 1, + hardThreshold: Number.MAX_SAFE_INTEGER, + }, + }); + + // Verify messages were processed: should have system + summary + new msg + // The converted messages include: system prompt, compaction-summary user msg, and the new user msg + // (compaction tool call is excluded since the summary already contains the info) + expect(params.messages.length).toBe(3); + }); + + test("buildStreamTextParams injects compaction message when threshold exceeded", async () => { + const warnLogs: string[] = []; + const infoLogs: string[] = []; + const mockLogger = { + ...noopLogger, + warn: (...args: unknown[]) => { + warnLogs.push(args.map(String).join(" ")); + }, + info: (...args: unknown[]) => { + infoLogs.push(args.map(String).join(" ")); + }, + }; + + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: mockLogger, + }); + + // Create a message that will exceed a very low threshold + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [ + { + id: "1", + role: "user", + parts: [ + { type: "text", text: "Hello world, this is a test message." }, + ], + }, + ], + model: newMockModel({ textResponse: "test" }), + compaction: { + // Set a very low threshold so any message exceeds it + softThreshold: 1, + hardThreshold: 100_000, // High hard threshold so no truncation + }, + }); + + // Verify compaction message was injected (system + user + compaction request = 3 messages) + expect(params.messages.length).toBe(3); + + // Check that the last message contains compaction request + const compactionRequest = params.messages.find( + (m) => + m.role === "user" && + (typeof m.content === "string" + ? m.content.includes("CONTEXT LIMIT") + : Array.isArray(m.content) && + m.content.some( + (c) => + c.type === "text" && + (c as { text: string }).text.includes("CONTEXT LIMIT") + )) + ); + expect(compactionRequest).toBeDefined(); + + // Verify compaction tool IS available when compaction is triggered + expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); + }); + + test("buildStreamTextParams respects compaction: false to disable", async () => { + const warnLogs: string[] = []; + const mockLogger = { + ...noopLogger, + warn: (...args: unknown[]) => { + warnLogs.push(args.map(String).join(" ")); + }, + }; + + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: mockLogger, + }); + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [ + { + id: "1", + role: "user", + parts: [ + { type: "text", text: "Hello world, this is a test message." }, + ], + }, + ], + model: newMockModel({ textResponse: "test" }), + compaction: false, + }); + + // Compaction tool should NOT be available when compaction is disabled + expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); + + // No warning should be logged even with messages + const warningLog = warnLogs.find((l) => + l.includes("approaching context limit") + ); + expect(warningLog).toBeUndefined(); + + // Only system + user message (no warning injected) + expect(params.messages.length).toBe(2); + }); + + test("buildStreamTextParams truncates messages at hard threshold during compaction", async () => { + const warnLogs: string[] = []; + const infoLogs: string[] = []; + const mockLogger = { + ...noopLogger, + warn: (...args: unknown[]) => { + warnLogs.push(args.map(String).join(" ")); + }, + info: (...args: unknown[]) => { + infoLogs.push(args.map(String).join(" ")); + }, + }; + + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: mockLogger, + }); + + // Create many messages that will exceed soft threshold and require truncation at hard + const messages: Message[] = Array.from({ length: 20 }, (_, i) => ({ + id: `${i + 1}`, + role: i % 2 === 0 ? "user" : "assistant", + parts: [ + { + type: "text", + text: `Message ${i + 1}: This is a longer message with additional content to generate more tokens for testing purposes. ${Array(100).fill("abcdefg").join("")}`, + }, + ], + })) as Message[]; + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages, + model: newMockModel({ textResponse: "test" }), + compaction: { + // Low soft threshold to trigger compaction + softThreshold: 1, + // Low hard threshold to force truncation + hardThreshold: 500, + }, + }); + + // Verify info log about truncation (when preserving messages) + const truncationLog = infoLogs.find((l) => + l.includes("Compaction: sending") + ); + expect(truncationLog).toBeDefined(); + + // Verify compaction tool IS available + expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); + + // Verify that messages were truncated (not all 20 messages + system) + // Should have: system + truncated messages + compaction request + expect(params.messages.length).toBeLessThan(10); + + // Verify compaction request message is present + const compactionRequest = params.messages.find( + (m) => + m.role === "user" && + (typeof m.content === "string" + ? m.content.includes("CONTEXT LIMIT") + : Array.isArray(m.content) && + m.content.some( + (c) => + c.type === "text" && + (c as { text: string }).text.includes("CONTEXT LIMIT") + )) + ); + expect(compactionRequest).toBeDefined(); + }); + + test("compaction loop: after model summarizes, second call does not trigger another compaction", async () => { + const infoLogs: string[] = []; + const mockLogger = { + ...noopLogger, + info: (...args: unknown[]) => { + infoLogs.push(args.map(String).join(" ")); + }, + }; + + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: mockLogger, + }); + + // Use thresholds that will be exceeded by original messages but not by compacted ones + // Original messages: ~10 messages with 700 chars each = high token count + // After compaction: summary + preserved messages should be under soft threshold + const softThreshold = 2000; + const hardThreshold = 3000; + + // Step 1: Create large messages that will exceed soft threshold + // Each message has ~700 characters of filler to generate significant tokens + const filler = Array(100).fill("abcdefg").join(""); + const originalMessages: Message[] = Array.from({ length: 10 }, (_, i) => ({ + id: `${i + 1}`, + role: i % 2 === 0 ? "user" : "assistant", + parts: [ + { + type: "text", + text: `Message ${i + 1}: ${filler}`, + }, + ], + })) as Message[]; + + // Create a mock model that returns a tool call to compact_conversation + // The tool is wrapped with withModelIntent, so input needs model_intent and properties + const summaryText = "Brief summary of the conversation."; + const mockModelWithToolCall = new MockLanguageModelV2({ + doStream: async () => { + return { + stream: simulateReadableStream({ + chunks: [ + { + type: "tool-call" as const, + toolName: COMPACT_CONVERSATION_TOOL_NAME, + toolCallId: "tool-call-1", + input: JSON.stringify({ + model_intent: "Compacting conversation history", + properties: { summary: summaryText }, + }), + }, + { + type: "finish" as const, + finishReason: "tool-calls" as const, + logprobs: undefined, + usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 }, + }, + ], + }), + }; + }, + }); + + // First call - should trigger compaction, model responds with tool call + const firstParams = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: originalMessages, + model: mockModelWithToolCall, + compaction: { softThreshold, hardThreshold }, + }); + + // Verify compaction was triggered + expect(firstParams.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); + + // Execute streamText and wait for completion (including tool execution) + const firstResult = streamText(firstParams); + + // Wait for the full result including tool calls and their results + const toolCalls = await firstResult.toolCalls; + const toolResults = await firstResult.toolResults; + + // Verify the model called the compaction tool + expect(toolCalls).toHaveLength(1); + expect(toolCalls[0]?.toolName).toBe(COMPACT_CONVERSATION_TOOL_NAME); + expect(toolResults).toHaveLength(1); + + // The tool should have executed and returned a summary + // biome-ignore lint/suspicious/noExplicitAny: test typing + const toolResult = toolResults[0] as any; + expect(toolResult?.output).toBeDefined(); + // The output contains the summary from the compaction tool + expect(toolResult?.output?.summary).toBe(summaryText); + + // Now build the assistant message with the completed tool call + // biome-ignore lint/suspicious/noExplicitAny: test typing + const toolCall = toolCalls[0] as any; + const assistantMessage: Message = { + id: crypto.randomUUID(), + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + toolCallId: toolCall?.toolCallId ?? "tool-1", + state: "output-available", + // The input has model_intent wrapper, but we store the unwrapped version + input: { summary: summaryText }, + output: toolResult?.output, + } as Message["parts"][number], + ], + }; + + // Construct the full message history as it would be after the first turn + // Original messages + compaction request + assistant's tool call response + const messagesForSecondCall: Message[] = [ + ...originalMessages, + { + id: "compaction-request", + role: "user", + parts: [ + { + type: "text", + text: "[SYSTEM NOTICE - CONTEXT LIMIT] Please call compact_conversation tool NOW", + }, + ], + }, + // The assistant's response with the completed tool call + assistantMessage, + ]; + + // Clear logs before second call + infoLogs.length = 0; + + // Step 2: Second call - after compaction is applied, should NOT trigger another compaction + const secondParams = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: messagesForSecondCall, + model: newMockModel({ textResponse: "Continuing the conversation..." }), + compaction: { softThreshold, hardThreshold }, + }); + + // After applying compaction: + // - Original 10 messages + compaction request should be replaced by summary + // - Only summary message + tool call message remain + // - Token count should be much lower now + + // Verify NO new compaction was triggered + const secondCompactionRequest = secondParams.messages.find( + (m) => + m.role === "user" && + (typeof m.content === "string" + ? m.content.includes("CONTEXT LIMIT") + : Array.isArray(m.content) && + m.content.some( + (c) => + c.type === "text" && + (c as { text: string }).text.includes("CONTEXT LIMIT") + )) + ); + expect(secondCompactionRequest).toBeUndefined(); + + // Compaction tool should NOT be included since we're under threshold after applying summary + expect(secondParams.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); + + // Verify the summary message is present (compaction was applied) + const summaryMessage = secondParams.messages.find( + (m) => + m.role === "user" && + (typeof m.content === "string" + ? m.content.includes("CONVERSATION SUMMARY") + : Array.isArray(m.content) && + m.content.some( + (c) => + c.type === "text" && + (c as { text: string }).text.includes("CONVERSATION SUMMARY") + )) + ); + expect(summaryMessage).toBeDefined(); + + // No "approaching context limit" log should appear in second call + const contextLimitLog = infoLogs.find((l) => + l.includes("approaching context limit") + ); + expect(contextLimitLog).toBeUndefined(); + }); +}); diff --git a/packages/scout-agent/lib/core.ts b/packages/scout-agent/lib/core.ts index 9e334c7..b508037 100644 --- a/packages/scout-agent/lib/core.ts +++ b/packages/scout-agent/lib/core.ts @@ -6,6 +6,11 @@ import * as slack from "@blink-sdk/slack"; import type { App } from "@slack/bolt"; import { convertToModelMessages, type LanguageModel, type Tool } from "ai"; import type * as blink from "blink"; +import { + DEFAULT_HARD_TOKEN_THRESHOLD, + DEFAULT_SOFT_TOKEN_THRESHOLD, + processCompaction, +} from "./compaction"; import { type CoderApiClient, type CoderWorkspaceInfo, @@ -54,6 +59,29 @@ export interface BuildStreamTextParamsOptions { * If not provided, the GitHub auth context will be created using the app ID and private key from the GitHub config. */ getGithubAppContext?: () => Promise; + /** + * Configuration for conversation compaction. + * If not provided, compaction features are enabled with default thresholds. + * Set to `false` to disable compaction entirely. + */ + compaction?: + | { + /** + * Soft token threshold at which to trigger compaction. + * When the conversation exceeds this threshold, a message is injected + * asking the model to call the compact_conversation tool. + * Default: 180 000 tokens + */ + softThreshold?: number; + /** + * Hard token threshold - max tokens to send for compaction. + * Messages beyond this limit are preserved and restored after compaction. + * Must be greater than softThreshold. + * Default: 190 000 tokens + */ + hardThreshold?: number; + } + | false; } interface Logger { @@ -326,6 +354,7 @@ export class Scout { tools: providedTools, getGithubAppContext, systemPrompt = defaultSystemPrompt, + compaction: compactionConfig, }: BuildStreamTextParamsOptions): Promise<{ model: LanguageModel; messages: ModelMessage[]; @@ -346,7 +375,28 @@ export class Scout { )() : undefined; - const slackMetadata = getSlackMetadata(messages); + // Process compaction if enabled + const compactionEnabled = compactionConfig !== false; + const softTokenThreshold = + (compactionConfig !== false + ? compactionConfig?.softThreshold + : undefined) ?? DEFAULT_SOFT_TOKEN_THRESHOLD; + const hardTokenThreshold = + (compactionConfig !== false + ? compactionConfig?.hardThreshold + : undefined) ?? DEFAULT_HARD_TOKEN_THRESHOLD; + + const { messages: compactedMessages, compactionTool } = compactionEnabled + ? await processCompaction({ + messages, + softTokenThreshold, + hardTokenThreshold, + model, + logger: this.logger, + }) + : { messages, compactionTool: {} }; + + const slackMetadata = getSlackMetadata(compactedMessages); const respondingInSlack = this.slack.app !== undefined && slackMetadata !== undefined; @@ -447,6 +497,7 @@ export class Scout { } const tools = { + ...compactionTool, ...(this.webSearch.config ? createWebSearchTools({ exaApiKey: this.webSearch.config.exaApiKey }) : {}), @@ -473,7 +524,7 @@ ${slack.formattingRules} `; } - const converted = convertToModelMessages(messages, { + const converted = convertToModelMessages(compactedMessages, { ignoreIncompleteToolCalls: true, tools, }); diff --git a/packages/scout-agent/lib/index.ts b/packages/scout-agent/lib/index.ts index 2b6d5a5..d60f62f 100644 --- a/packages/scout-agent/lib/index.ts +++ b/packages/scout-agent/lib/index.ts @@ -1,3 +1,4 @@ +export * from "./compaction"; export type { CoderApiClient, CoderWorkspaceInfo } from "./compute/coder/index"; export type { DaytonaClient, DaytonaSandbox } from "./compute/daytona/index"; export * from "./core"; diff --git a/packages/scout-agent/package.json b/packages/scout-agent/package.json index 67b087f..366919b 100644 --- a/packages/scout-agent/package.json +++ b/packages/scout-agent/package.json @@ -1,7 +1,7 @@ { "name": "@blink-sdk/scout-agent", "description": "A general-purpose AI agent with GitHub, Slack, web search, and compute capabilities built on Blink SDK.", - "version": "0.0.9", + "version": "0.0.10", "type": "module", "keywords": [ "blink", @@ -50,6 +50,7 @@ "@blink-sdk/multiplexer": "^0.0.1", "@blink-sdk/slack": "^1.1.2", "@octokit/webhooks": "^14.1.3", + "ai-tokenizer": "^1.0.6", "exa-js": "^2.0.3" }, "devDependencies": {