From e74aa623ac8d1b25e07c92af510ae1b6080c6f4d Mon Sep 17 00:00:00 2001 From: stainlu Date: Thu, 7 May 2026 20:05:03 +0800 Subject: [PATCH 1/2] perf: bound review context hydration --- src/clawsweeper.ts | 269 +++++++++++++++++++++++++++++++++++---- test/clawsweeper.test.ts | 54 ++++++++ 2 files changed, 301 insertions(+), 22 deletions(-) diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index a3f3d61c51..3c1b6b0475 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -306,12 +306,20 @@ interface ItemContext { pullReviewComments?: unknown[]; counts?: { comments: number; + commentsHydrated?: number; + commentsTruncated?: boolean; timeline: number; closingPullRequests?: number; relatedItems?: number; pullFiles?: number; + pullFilesHydrated?: number; + pullFilesTruncated?: boolean; pullCommits?: number; + pullCommitsHydrated?: number; + pullCommitsTruncated?: boolean; pullReviewComments?: number; + pullReviewCommentsHydrated?: number; + pullReviewCommentsTruncated?: boolean; }; } @@ -1511,15 +1519,38 @@ export function compactMappedSlice( items: readonly T[], limit: number, mapper: (item: T) => unknown, +): unknown[] { + return compactMappedWindow(items, items.length, limit, mapper); +} + +export function compactMappedWindow( + items: readonly T[], + total: number, + limit: number, + mapper: (item: T) => unknown, ): unknown[] { const boundedLimit = Math.max(0, Math.floor(limit)); - if (items.length <= boundedLimit) return items.map(mapper); + const boundedTotal = Math.max(0, Math.floor(total)); + if (boundedTotal <= boundedLimit && items.length <= boundedLimit) return items.map(mapper); + if (boundedLimit === 0) { + return boundedTotal > 0 + ? [{ omitted: boundedTotal, note: "middle entries omitted from prompt context" }] + : []; + } const keepStart = Math.floor(boundedLimit / 2); const keepEnd = Math.max(0, boundedLimit - keepStart); + const retained = + items.length > boundedLimit && boundedTotal === items.length + ? items + : items.slice(0, boundedLimit); + const retainedStart = retained.slice(0, keepStart); + const retainedEnd = + keepEnd > 0 ? retained.slice(Math.max(keepStart, retained.length - keepEnd)) : []; + const omitted = Math.max(0, boundedTotal - retainedStart.length - retainedEnd.length); return [ - ...items.slice(0, keepStart).map(mapper), - { omitted: items.length - boundedLimit, note: "middle entries omitted from prompt context" }, - ...(keepEnd > 0 ? items.slice(items.length - keepEnd).map(mapper) : []), + ...retainedStart.map(mapper), + ...(omitted > 0 ? [{ omitted, note: "middle entries omitted from prompt context" }] : []), + ...retainedEnd.map(mapper), ]; } @@ -2045,12 +2076,113 @@ export function githubPaginatedPath(path: string): string { return serialized ? `${base}?${serialized}` : base; } +function githubPagePath(path: string, page: number, perPage = 100): string { + const [basePart, query = ""] = path.split("?", 2); + const base = basePart ?? path; + const params = new URLSearchParams(query); + params.set("per_page", String(Math.max(1, Math.floor(perPage)))); + params.set("page", String(Math.max(1, Math.floor(page)))); + const serialized = params.toString(); + return serialized ? `${base}?${serialized}` : base; +} + function ghPaged(path: string): T[] { const pages = ghJson(["api", githubPaginatedPath(path), "--paginate", "--slurp"]); if (!Array.isArray(pages)) return []; return pages.flatMap((page) => (Array.isArray(page) ? (page as T[]) : [])); } +interface ContextHydration { + items: T[]; + total: number; + hydrated: number; + truncated: boolean; +} + +function ghPage(path: string, page: number): T[] { + const items = ghJson(["api", githubPagePath(path, page)]); + return Array.isArray(items) ? (items as T[]) : []; +} + +function githubCount(value: unknown): number | null { + const count = + typeof value === "number" ? value : typeof value === "string" ? Number(value) : Number.NaN; + if (!Number.isFinite(count) || count < 0) return null; + return Math.floor(count); +} + +interface GithubContextWindowPlan { + keepStart: number; + keepEnd: number; + tailFirstPageNumber: number; + lastPageNumber: number; + tailOffset: number; +} + +export function githubContextWindowPlan( + total: number, + promptLimit: number, + perPage = 100, +): GithubContextWindowPlan { + const boundedTotal = Math.max(0, Math.floor(total)); + const boundedLimit = Math.max(0, Math.floor(promptLimit)); + const boundedPerPage = Math.max(1, Math.floor(perPage)); + const keepStart = Math.floor(boundedLimit / 2); + const keepEnd = Math.max(0, boundedLimit - keepStart); + const tailStartIndex = Math.max(0, boundedTotal - keepEnd); + const tailFirstPageNumber = Math.floor(tailStartIndex / boundedPerPage) + 1; + return { + keepStart, + keepEnd, + tailFirstPageNumber, + lastPageNumber: Math.max(1, Math.ceil(boundedTotal / boundedPerPage)), + tailOffset: tailStartIndex - (tailFirstPageNumber - 1) * boundedPerPage, + }; +} + +function ghPagedContextWindow( + path: string, + totalCount: unknown, + promptLimit: number, +): ContextHydration { + const total = githubCount(totalCount); + const boundedLimit = Math.max(0, Math.floor(promptLimit)); + if (total === null) { + const items = ghPaged(path); + return { items, total: items.length, hydrated: items.length, truncated: false }; + } + if (total === 0 || boundedLimit === 0) { + return { items: [], total, hydrated: 0, truncated: total > 0 }; + } + if (total <= boundedLimit) { + const items = total <= 100 ? ghPage(path, 1) : ghPaged(path); + return { + items, + total: Math.max(total, items.length), + hydrated: items.length, + truncated: false, + }; + } + + const plan = githubContextWindowPlan(total, boundedLimit); + const firstPage = plan.keepStart > 0 ? ghPage(path, 1) : []; + const headItems = firstPage.slice(0, plan.keepStart); + const tailPages: T[] = []; + if (plan.keepEnd > 0) { + for (let page = plan.tailFirstPageNumber; page <= plan.lastPageNumber; page += 1) { + tailPages.push(...(page === 1 && plan.keepStart > 0 ? firstPage : ghPage(path, page))); + } + } + const tailItems = tailPages.slice(plan.tailOffset, plan.tailOffset + plan.keepEnd); + const items = [...headItems, ...tailItems]; + return { + items, + total, + hydrated: items.length, + truncated: total > items.length, + }; +} + function ensureDir(path: string): void { mkdirSync(path, { recursive: true }); } @@ -3161,14 +3293,22 @@ function planCandidates(options: { function collectItemContext(item: Item): ItemContext { const issue = ghJson(["api", `repos/${targetRepo()}/issues/${item.number}`]); - const comments = ghPaged(`repos/${targetRepo()}/issues/${item.number}/comments`); + const issueRecord = asRecord(issue); + const commentsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/issues/${item.number}/comments`, + issueRecord.comments, + 24, + ); + const comments = commentsWindow.items; const timeline = ghPaged(`repos/${targetRepo()}/issues/${item.number}/timeline`); const context: ItemContext = { issue: compactIssue(issue), - comments: compactMappedSlice(comments, 24, compactComment), + comments: compactMappedWindow(comments, commentsWindow.total, 24, compactComment), timeline: compactMappedSlice(timeline, 80, compactTimelineEvent), counts: { - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, }, }; @@ -3180,7 +3320,9 @@ function collectItemContext(item: Item): ItemContext { context.closingPullRequests = compactMappedSlice(closingPullRequests, 12, compactPullRequest); context.counts = { ...context.counts, - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, closingPullRequests: closingPullRequests.length, }; @@ -3188,20 +3330,54 @@ function collectItemContext(item: Item): ItemContext { } if (item.kind === "pull_request") { pullRequest = ghJson(["api", `repos/${targetRepo()}/pulls/${item.number}`]); - const pullFiles = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/files`); - const pullCommits = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/commits`); - pullReviewComments = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/comments`); + const pullRecord = asRecord(pullRequest); + const pullFilesWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/files`, + pullRecord.changed_files, + 80, + ); + const pullFiles = pullFilesWindow.items; + const pullCommitsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/commits`, + pullRecord.commits, + 80, + ); + const pullCommits = pullCommitsWindow.items; + const pullReviewCommentsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/comments`, + pullRecord.review_comments, + 40, + ); + pullReviewComments = pullReviewCommentsWindow.items; context.pullRequest = compactPullRequest(pullRequest); - context.pullFiles = compactMappedSlice(pullFiles, 80, compactPullFile); - context.pullCommits = compactMappedSlice(pullCommits, 80, compactPullCommit); - context.pullReviewComments = compactMappedSlice(pullReviewComments, 40, compactComment); + context.pullFiles = compactMappedWindow(pullFiles, pullFilesWindow.total, 80, compactPullFile); + context.pullCommits = compactMappedWindow( + pullCommits, + pullCommitsWindow.total, + 80, + compactPullCommit, + ); + context.pullReviewComments = compactMappedWindow( + pullReviewComments, + pullReviewCommentsWindow.total, + 40, + compactComment, + ); context.counts = { ...context.counts, - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, - pullFiles: pullFiles.length, - pullCommits: pullCommits.length, - pullReviewComments: pullReviewComments.length, + pullFiles: pullFilesWindow.total, + pullFilesHydrated: pullFilesWindow.hydrated, + pullFilesTruncated: pullFilesWindow.truncated, + pullCommits: pullCommitsWindow.total, + pullCommitsHydrated: pullCommitsWindow.hydrated, + pullCommitsTruncated: pullCommitsWindow.truncated, + pullReviewComments: pullReviewCommentsWindow.total, + pullReviewCommentsHydrated: pullReviewCommentsWindow.hydrated, + pullReviewCommentsTruncated: pullReviewCommentsWindow.truncated, }; } const relatedOptions: Parameters[0] = { @@ -3216,14 +3392,28 @@ function collectItemContext(item: Item): ItemContext { if (relatedItems.length) { context.relatedItems = relatedItems; const counts: NonNullable = { - comments: context.counts?.comments ?? comments.length, + comments: context.counts?.comments ?? commentsWindow.total, + commentsHydrated: context.counts?.commentsHydrated ?? commentsWindow.hydrated, + commentsTruncated: context.counts?.commentsTruncated ?? commentsWindow.truncated, timeline: context.counts?.timeline ?? timeline.length, relatedItems: relatedItems.length, }; if (context.counts?.pullFiles !== undefined) counts.pullFiles = context.counts.pullFiles; + if (context.counts?.pullFilesHydrated !== undefined) + counts.pullFilesHydrated = context.counts.pullFilesHydrated; + if (context.counts?.pullFilesTruncated !== undefined) + counts.pullFilesTruncated = context.counts.pullFilesTruncated; if (context.counts?.pullCommits !== undefined) counts.pullCommits = context.counts.pullCommits; + if (context.counts?.pullCommitsHydrated !== undefined) + counts.pullCommitsHydrated = context.counts.pullCommitsHydrated; + if (context.counts?.pullCommitsTruncated !== undefined) + counts.pullCommitsTruncated = context.counts.pullCommitsTruncated; if (context.counts?.pullReviewComments !== undefined) counts.pullReviewComments = context.counts.pullReviewComments; + if (context.counts?.pullReviewCommentsHydrated !== undefined) + counts.pullReviewCommentsHydrated = context.counts.pullReviewCommentsHydrated; + if (context.counts?.pullReviewCommentsTruncated !== undefined) + counts.pullReviewCommentsTruncated = context.counts.pullReviewCommentsTruncated; if (context.counts?.closingPullRequests !== undefined) counts.closingPullRequests = context.counts.closingPullRequests; context.counts = counts; @@ -4888,6 +5078,20 @@ function reviewTelemetryNumber(value: number | undefined): string { return String(Math.max(0, Math.round(value))); } +function contextCountText( + total: number | undefined, + fallback: number, + hydrated?: number, + truncated?: boolean, +): string { + const displayTotal = + total === undefined || !Number.isFinite(total) ? Math.max(0, fallback) : Math.max(0, total); + if (hydrated === undefined || !Number.isFinite(hydrated)) return String(displayTotal); + const displayHydrated = Math.max(0, Math.round(hydrated)); + if (!truncated && displayHydrated >= displayTotal) return String(displayTotal); + return `${displayTotal} (hydrated ${displayHydrated}${truncated ? ", truncated" : ""})`; +} + function runtimeReviewTextFromReport(markdown: string): string { return runtimeReviewText({ model: frontMatterValue(markdown, "review_model") ?? "", @@ -6075,11 +6279,32 @@ ${options.action.closeComment ? options.action.closeComment : "_No close comment ## GitHub Snapshot -- comments: ${options.context.counts?.comments ?? options.context.comments.length} +- comments: ${contextCountText( + options.context.counts?.comments, + options.context.comments.length, + options.context.counts?.commentsHydrated, + options.context.counts?.commentsTruncated, + )} - timeline events: ${options.context.counts?.timeline ?? options.context.timeline.length} - related items: ${options.context.counts?.relatedItems ?? options.context.relatedItems?.length ?? 0} -- PR files: ${options.context.counts?.pullFiles ?? options.context.pullFiles?.length ?? 0} -- PR commits: ${options.context.counts?.pullCommits ?? options.context.pullCommits?.length ?? 0} +- PR files: ${contextCountText( + options.context.counts?.pullFiles, + options.context.pullFiles?.length ?? 0, + options.context.counts?.pullFilesHydrated, + options.context.counts?.pullFilesTruncated, + )} +- PR commits: ${contextCountText( + options.context.counts?.pullCommits, + options.context.pullCommits?.length ?? 0, + options.context.counts?.pullCommitsHydrated, + options.context.counts?.pullCommitsTruncated, + )} +- PR review comments: ${contextCountText( + options.context.counts?.pullReviewComments, + options.context.pullReviewComments?.length ?? 0, + options.context.counts?.pullReviewCommentsHydrated, + options.context.counts?.pullReviewCommentsTruncated, + )} ## Review Telemetry diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index 6d3fa821bb..22c9911127 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -17,10 +17,12 @@ import { closeReasonsArg, closingPullRequestReferenceTarget, compactMappedSlice, + compactMappedWindow, codexEnv, dashboardClosedAt, fixedPullRequestFromCommitPullsForTest, formatRecentClosedRows, + githubContextWindowPlan, githubPaginatedPath, ghRetryKind, hotIntakeRecencyMs, @@ -222,6 +224,58 @@ test("compactMappedSlice maps every entry when no compaction is needed", () => { assert.deepEqual(mapped, [1, 2, 3]); }); +test("compactMappedWindow marks omitted entries when hydration is already bounded", () => { + const mapped: number[] = []; + const result = compactMappedWindow([1, 2, 5, 6], 6, 4, (value) => { + mapped.push(value); + return value * 10; + }); + assert.deepEqual(result, [ + 10, + 20, + { omitted: 2, note: "middle entries omitted from prompt context" }, + 50, + 60, + ]); + assert.deepEqual(mapped, [1, 2, 5, 6]); +}); + +test("compactMappedWindow keeps bounded hydrated context when total is larger than limit", () => { + const mapped: number[] = []; + const result = compactMappedWindow([1, 2, 99, 100], 100, 4, (value) => { + mapped.push(value); + return value; + }); + assert.deepEqual(result, [ + 1, + 2, + { omitted: 96, note: "middle entries omitted from prompt context" }, + 99, + 100, + ]); + assert.deepEqual(mapped, [1, 2, 99, 100]); +}); + +test("githubContextWindowPlan includes prior page when the tail crosses a page boundary", () => { + assert.deepEqual(githubContextWindowPlan(101, 80), { + keepStart: 40, + keepEnd: 40, + tailFirstPageNumber: 1, + lastPageNumber: 2, + tailOffset: 61, + }); +}); + +test("githubContextWindowPlan keeps large tails to the final page when possible", () => { + assert.deepEqual(githubContextWindowPlan(3000, 80), { + keepStart: 40, + keepEnd: 40, + tailFirstPageNumber: 30, + lastPageNumber: 30, + tailOffset: 60, + }); +}); + test("review prompt assets match tracked files", () => { assert.equal(reviewPromptTemplate(), readFileSync("prompts/review-item.md", "utf8")); assert.deepEqual( From 00b414fc16b0f15deb3349edc8eb1c5935020b5c Mon Sep 17 00:00:00 2001 From: stainlu Date: Sat, 9 May 2026 01:51:57 +0800 Subject: [PATCH 2/2] test: cover review context window hydration --- src/clawsweeper.ts | 18 ++++++++++----- test/clawsweeper.test.ts | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index 3c1b6b0475..e9304a5062 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -2092,7 +2092,7 @@ function ghPaged(path: string): T[] { return pages.flatMap((page) => (Array.isArray(page) ? (page as T[]) : [])); } -interface ContextHydration { +export interface ContextHydration { items: T[]; total: number; hydrated: number; @@ -2140,22 +2140,28 @@ export function githubContextWindowPlan( }; } -function ghPagedContextWindow( +export function ghPagedContextWindow( path: string, totalCount: unknown, promptLimit: number, + fetchers: { + page?: (path: string, page: number) => T[]; + paged?: (path: string) => T[]; + } = {}, ): ContextHydration { + const fetchPage = fetchers.page ?? ghPage; + const fetchPaged = fetchers.paged ?? ghPaged; const total = githubCount(totalCount); const boundedLimit = Math.max(0, Math.floor(promptLimit)); if (total === null) { - const items = ghPaged(path); + const items = fetchPaged(path); return { items, total: items.length, hydrated: items.length, truncated: false }; } if (total === 0 || boundedLimit === 0) { return { items: [], total, hydrated: 0, truncated: total > 0 }; } if (total <= boundedLimit) { - const items = total <= 100 ? ghPage(path, 1) : ghPaged(path); + const items = total <= 100 ? fetchPage(path, 1) : fetchPaged(path); return { items, total: Math.max(total, items.length), @@ -2165,12 +2171,12 @@ function ghPagedContextWindow( } const plan = githubContextWindowPlan(total, boundedLimit); - const firstPage = plan.keepStart > 0 ? ghPage(path, 1) : []; + const firstPage = plan.keepStart > 0 ? fetchPage(path, 1) : []; const headItems = firstPage.slice(0, plan.keepStart); const tailPages: T[] = []; if (plan.keepEnd > 0) { for (let page = plan.tailFirstPageNumber; page <= plan.lastPageNumber; page += 1) { - tailPages.push(...(page === 1 && plan.keepStart > 0 ? firstPage : ghPage(path, page))); + tailPages.push(...(page === 1 && plan.keepStart > 0 ? firstPage : fetchPage(path, page))); } } const tailItems = tailPages.slice(plan.tailOffset, plan.tailOffset + plan.keepEnd); diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index 22c9911127..aa655f3e8d 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -23,6 +23,7 @@ import { fixedPullRequestFromCommitPullsForTest, formatRecentClosedRows, githubContextWindowPlan, + ghPagedContextWindow, githubPaginatedPath, ghRetryKind, hotIntakeRecencyMs, @@ -276,6 +277,53 @@ test("githubContextWindowPlan keeps large tails to the final page when possible" }); }); +test("ghPagedContextWindow reuses first page when tail overlaps the head page", () => { + const fetchedPages: number[] = []; + const window = ghPagedContextWindow( + "repos/openclaw/openclaw/issues/123/comments", + 101, + 80, + { + page: (_path, page) => { + fetchedPages.push(page); + const start = (page - 1) * 100 + 1; + const end = Math.min(page * 100, 101); + return Array.from({ length: end - start + 1 }, (_, index) => start + index); + }, + }, + ); + + assert.deepEqual(fetchedPages, [1, 2]); + assert.deepEqual(window.items, [ + ...Array.from({ length: 40 }, (_, index) => index + 1), + ...Array.from({ length: 40 }, (_, index) => index + 62), + ]); + assert.equal(window.total, 101); + assert.equal(window.hydrated, 80); + assert.equal(window.truncated, true); +}); + +test("ghPagedContextWindow falls back to full pagination when total is missing", () => { + const window = ghPagedContextWindow( + "repos/openclaw/openclaw/pulls/123/files", + undefined, + 80, + { + paged: () => [1, 2, 3], + page: () => { + throw new Error("page fetch should not be used without a total count"); + }, + }, + ); + + assert.deepEqual(window, { + items: [1, 2, 3], + total: 3, + hydrated: 3, + truncated: false, + }); +}); + test("review prompt assets match tracked files", () => { assert.equal(reviewPromptTemplate(), readFileSync("prompts/review-item.md", "utf8")); assert.deepEqual(