diff --git a/src/core/metrics/token-estimate.ts b/src/core/metrics/token-estimate.ts new file mode 100644 index 000000000..985b4b99c --- /dev/null +++ b/src/core/metrics/token-estimate.ts @@ -0,0 +1,52 @@ +export interface TextMetrics { + returned_chars: number; + estimated_tokens: number; + truncated: boolean; + mode?: string; +} + +export interface RawTextMetrics extends TextMetrics { + raw_chars: number; + raw_estimated_tokens: number; + compression_ratio: number; +} + +export function estimateTokens(text: string): number { + if (text.length === 0) return 0; + // Deliberately approximate and provider-neutral. The field name is + // `estimated_tokens`, not exact tokens. + return Math.ceil(text.length / 4); +} + +export function buildTextMetrics(text: string, opts?: { mode?: string; truncated?: boolean }): TextMetrics { + return { + returned_chars: text.length, + estimated_tokens: estimateTokens(text), + truncated: opts?.truncated ?? text.includes('...[truncated]'), + ...(opts?.mode ? { mode: opts.mode } : {}), + }; +} + +export function buildRawTextMetrics( + rawText: string, + returnedText: string, + opts?: { mode?: string; truncated?: boolean }, +): RawTextMetrics { + const rawTokens = estimateTokens(rawText); + const returnedTokens = estimateTokens(returnedText); + return { + raw_chars: rawText.length, + raw_estimated_tokens: rawTokens, + returned_chars: returnedText.length, + estimated_tokens: returnedTokens, + compression_ratio: returnedText.length > 0 + ? Number((rawText.length / returnedText.length).toFixed(3)) + : rawText.length === 0 ? 1 : 0, + truncated: opts?.truncated ?? returnedText.includes('...[truncated]'), + ...(opts?.mode ? { mode: opts.mode } : {}), + }; +} + +export function appendMetricsFooter(text: string, metrics: object): string { + return `${text}\n\n[openchrome_metrics] ${JSON.stringify(metrics)}`; +} diff --git a/src/tools/crawl-sitemap.ts b/src/tools/crawl-sitemap.ts index 253a60ebf..b5c2414b4 100644 --- a/src/tools/crawl-sitemap.ts +++ b/src/tools/crawl-sitemap.ts @@ -27,6 +27,7 @@ import { StaticFetchError, StaticReason, } from '../utils/static-fetch'; +import { buildTextMetrics } from '../core/metrics/token-estimate'; import { extractMainContent, toMarkdown } from '../core/extract/html-to-markdown'; import { sanitizeContent } from '../security/content-sanitizer'; import { getGlobalConfig } from '../config/global'; @@ -77,6 +78,10 @@ const definition: MCPToolDefinition = { description: 'Fetch engine: "cdp" (default, opens a Chrome tab per page), "static" (Node fetch only, fails closed on insufficient pages), or "auto" (static first, fall back to CDP when static is insufficient).', }, + include_metrics: { + type: 'boolean', + description: 'When true, include approximate output size/token metrics in the JSON result. Default: false.', + }, }, required: ['url'], }, @@ -263,7 +268,6 @@ async function resolveSitemapPageUrls( // the caller (auto mode) can fall back to CDP. // --------------------------------------------------------------------------- - function cleanMarkdownFromHtml( html: string, cleanOpts: { onlyMainContent: boolean; includeLinks: boolean }, @@ -581,6 +585,7 @@ const handler: ToolHandler = async ( }; const concurrency = args.concurrency != null ? Math.max(1, Math.min(10, Number(args.concurrency))) : 3; + const includeMetrics = args.include_metrics === true; const engineArg = args.engine as string | undefined; let engine: EngineMode = 'cdp'; if (engineArg === 'static' || engineArg === 'auto' || engineArg === 'cdp') { @@ -781,10 +786,26 @@ const handler: ToolHandler = async ( sitemap_source: sitemapSource, }; - const output = { summary, pages }; + const buildOutput = (outputPages: CrawledPage[]) => includeMetrics + ? { + summary: { + ...summary, + metrics: { + returned_chars: outputPages.reduce((sum, p) => sum + p.content.length, 0), + estimated_tokens: outputPages.reduce((sum, p) => sum + buildTextMetrics(p.content).estimated_tokens, 0), + truncated_pages: outputPages.filter((p) => p.content.includes('...[truncated]')).length, + mode: `crawl_sitemap:${outputFormat}`, + }, + }, + pages: outputPages.map((p) => ({ + ...p, + metrics: buildTextMetrics(p.content, { mode: outputFormat }), + })), + } + : { summary, pages: outputPages }; // Ensure output fits within limits - let outputJson = JSON.stringify(output, null, 2); + let outputJson = JSON.stringify(buildOutput(pages), null, 2); if (outputJson.length > MAX_OUTPUT_CHARS) { // Truncate page contents progressively to fit const truncatedPages = pages.map((p) => ({ @@ -794,7 +815,7 @@ const handler: ToolHandler = async ( ? p.content.slice(0, 2000) + '...[truncated]' : p.content, })); - outputJson = JSON.stringify({ summary, pages: truncatedPages }, null, 2); + outputJson = JSON.stringify(buildOutput(truncatedPages), null, 2); // If still too large, remove content entirely if (outputJson.length > MAX_OUTPUT_CHARS) { @@ -804,12 +825,41 @@ const handler: ToolHandler = async ( links_found: p.links_found, content_length: p.content.length, error: p.error, + ...(includeMetrics && { metrics: buildTextMetrics('', { mode: outputFormat, truncated: true }) }), })); + // Per-page metrics are computed from empty strings (content omitted), + // so the summary metrics must align with what is actually emitted — + // not the original full-content pages. + const emptyPageMetrics = buildTextMetrics('', { mode: outputFormat, truncated: true }); + const minimalSummary = includeMetrics + ? { + ...summary, + metrics: { + returned_chars: minimalPages.reduce( + (sum, p) => sum + (p.metrics?.returned_chars ?? 0), + 0, + ), + estimated_tokens: minimalPages.reduce( + (sum, p) => sum + (p.metrics?.estimated_tokens ?? emptyPageMetrics.estimated_tokens), + 0, + ), + truncated_pages: pages.length, + mode: `crawl_sitemap:${outputFormat}`, + }, + } + : summary; outputJson = JSON.stringify( - { summary, pages: minimalPages, note: 'Content omitted due to size constraints' }, + { summary: minimalSummary, pages: minimalPages, note: 'Content omitted due to size constraints' }, null, 2, ); + if (outputJson.length > MAX_OUTPUT_CHARS) { + outputJson = JSON.stringify({ + summary: minimalSummary, + pages: minimalPages.map(({ url, title, links_found, content_length, error }) => ({ url, title, links_found, content_length, error })), + note: 'Content omitted due to size constraints', + }, null, 2); + } } } diff --git a/src/tools/crawl.ts b/src/tools/crawl.ts index 7e4fbce56..52276497d 100644 --- a/src/tools/crawl.ts +++ b/src/tools/crawl.ts @@ -29,6 +29,7 @@ import { StaticFetchError, StaticReason, } from '../utils/static-fetch'; +import { buildTextMetrics } from '../core/metrics/token-estimate'; import { buildUrlScoreOptions, scoreUrl, UrlScoreOptions } from '../core/crawl/url-scorer'; import { extractMainContent, toMarkdown } from '../core/extract/html-to-markdown'; import { sanitizeContent } from '../security/content-sanitizer'; @@ -100,6 +101,10 @@ const definition: MCPToolDefinition = { description: 'Fetch engine: "cdp" (default, opens a Chrome tab per page), "static" (Node fetch only, fails closed on insufficient pages), or "auto" (static first, fall back to CDP when static is insufficient).', }, + include_metrics: { + type: 'boolean', + description: 'When true, include approximate output size/token metrics in the JSON result. Default: false.', + }, strategy: { type: 'string', enum: ['bfs', 'best_first'], @@ -239,7 +244,6 @@ async function fetchRobotsTxt( // the caller (auto mode) can fall back to CDP. // --------------------------------------------------------------------------- - function cleanMarkdownFromHtml( html: string, cleanOpts: { onlyMainContent: boolean; includeLinks: boolean }, @@ -355,7 +359,9 @@ async function fetchPageStatic( /** Options for `fetchOnePage`, shared by legacy crawl and host-driven crawl jobs. */ export interface FetchOnePageOptions { outputFormat: string; + /** When true (default), strip nav/footer/ads from extracted content. */ onlyMainContent?: boolean; + /** When true, include outgoing links in the result for BFS expansion. */ includeLinks?: boolean; } @@ -614,6 +620,7 @@ const handler: ToolHandler = async ( const delayMs = args.delay_ms != null ? Number(args.delay_ms) : 1000; const concurrency = args.concurrency != null ? Math.max(1, Math.min(10, Number(args.concurrency))) : 3; + const includeMetrics = args.include_metrics === true; const engineArg = args.engine as string | undefined; let engine: EngineMode = 'cdp'; if (engineArg === 'static' || engineArg === 'auto' || engineArg === 'cdp') { @@ -961,10 +968,26 @@ const handler: ToolHandler = async ( ...(adaptiveDispatcher ? { dispatcher: adaptiveDispatcher.stats() } : {}), }; - const output = { summary, pages }; + const buildOutput = (outputPages: CrawledPage[]) => includeMetrics + ? { + summary: { + ...summary, + metrics: { + returned_chars: outputPages.reduce((sum, p) => sum + p.content.length, 0), + estimated_tokens: outputPages.reduce((sum, p) => sum + buildTextMetrics(p.content).estimated_tokens, 0), + truncated_pages: outputPages.filter((p) => p.content.includes('...[truncated]')).length, + mode: `crawl:${outputFormat}`, + }, + }, + pages: outputPages.map((p) => ({ + ...p, + metrics: buildTextMetrics(p.content, { mode: outputFormat }), + })), + } + : { summary, pages: outputPages }; // Ensure output fits within limits - let outputJson = JSON.stringify(output, null, 2); + let outputJson = JSON.stringify(buildOutput(pages), null, 2); if (outputJson.length > MAX_OUTPUT_CHARS) { // Truncate page contents progressively to fit const truncatedPages = pages.map((p) => ({ @@ -973,7 +996,7 @@ const handler: ToolHandler = async ( ? p.content.slice(0, 2000) + '...[truncated]' : p.content, })); - outputJson = JSON.stringify({ summary, pages: truncatedPages }, null, 2); + outputJson = JSON.stringify(buildOutput(truncatedPages), null, 2); // If still too large, remove content entirely if (outputJson.length > MAX_OUTPUT_CHARS) { @@ -985,7 +1008,37 @@ const handler: ToolHandler = async ( content_length: p.content.length, error: p.error, })); - outputJson = JSON.stringify({ summary, pages: minimalPages, note: 'Content omitted due to size constraints' }, null, 2); + const minimalOutput = includeMetrics + ? { + summary: { + ...summary, + metrics: { + returned_chars: 0, + estimated_tokens: 0, + truncated_pages: minimalPages.length, + mode: `crawl:${outputFormat}`, + }, + }, + pages: minimalPages.map((p) => ({ + ...p, + metrics: buildTextMetrics('', { mode: outputFormat, truncated: true }), + })), + note: 'Content omitted due to size constraints', + } + : { summary, pages: minimalPages, note: 'Content omitted due to size constraints' }; + outputJson = JSON.stringify(minimalOutput, null, 2); + if (outputJson.length > MAX_OUTPUT_CHARS) { + outputJson = JSON.stringify({ + summary: includeMetrics + ? { + ...summary, + metrics: { returned_chars: 0, estimated_tokens: 0, truncated_pages: pages.length, mode: `crawl:${outputFormat}` }, + } + : summary, + pages: minimalPages.map(({ url, title, depth, links_found, content_length, error }) => ({ url, title, depth, links_found, content_length, error })), + note: 'Content omitted due to size constraints', + }, null, 2); + } } } diff --git a/src/tools/inspect.ts b/src/tools/inspect.ts index af817d106..a7adca435 100644 --- a/src/tools/inspect.ts +++ b/src/tools/inspect.ts @@ -14,6 +14,7 @@ import { TOOL_ANNOTATIONS } from '../types/tool-annotations'; import { getSessionManager } from '../session-manager'; import { withTimeout } from '../utils/with-timeout'; import { getAllShadowRoots, querySelectorInShadowRoots } from '../utils/shadow-dom'; +import { appendMetricsFooter, buildTextMetrics } from '../core/metrics/token-estimate'; import { prependHeaderText } from './_shared/state-header'; import { formatNodeRefToken, @@ -40,6 +41,10 @@ const definition: MCPToolDefinition = { enum: ['interactive', 'all', 'visible'], description: 'Element scope. Default: visible', }, + include_metrics: { + type: 'boolean', + description: 'When true, append approximate returned size/token metrics to text output. Default: false.', + }, }, required: ['tabId', 'query'], }, @@ -108,6 +113,7 @@ const handler: ToolHandler = async ( const tabId = args.tabId as string; const query = args.query as string; const scope = (args.scope as string) || 'visible'; + const includeMetrics = args.include_metrics === true; const sessionManager = getSessionManager(); @@ -578,10 +584,15 @@ const handler: ToolHandler = async ( // Footer with page context (always included) lines.push(`[Page] ${inspectResult.url} | "${inspectResult.title}"`); - const inspectPayload = lines.join('\n'); + const headeredText = prependHeaderText({ url: inspectResult.url, title: inspectResult.title, mode: 'inspect', capturedAt: Date.now(), tabId }, inspectPayload); return { - content: [{ type: 'text', text: prependHeaderText({ url: inspectResult.url, title: inspectResult.title, mode: 'inspect', capturedAt: Date.now(), tabId }, inspectPayload) }], + content: [{ + type: 'text', + text: includeMetrics + ? appendMetricsFooter(headeredText, buildTextMetrics(headeredText, { mode: `inspect:${scope}` })) + : headeredText, + }], }; } catch (error) { return { diff --git a/src/tools/read-page.ts b/src/tools/read-page.ts index dc73f93e2..1610c24e6 100644 --- a/src/tools/read-page.ts +++ b/src/tools/read-page.ts @@ -13,6 +13,7 @@ import { MAX_OUTPUT_CHARS } from '../config/defaults'; import { withTimeout } from '../utils/with-timeout'; import { SnapshotStore } from '../compression/snapshot-store'; import { sanitizeContent } from '../security/content-sanitizer'; +import { appendMetricsFooter, buildTextMetrics } from '../core/metrics/token-estimate'; import { getGlobalConfig } from '../config/global'; import { extractMainContent, toMarkdown } from '../core/extract/html-to-markdown'; import { getCurrentLoaderId, mintNodeRefSync } from '../core/perception/node-ref'; @@ -149,6 +150,10 @@ const definition: MCPToolDefinition = { type: 'boolean', description: 'Include structured read_page timing diagnostics in the MCP result metadata. Default: false.', }, + include_metrics: { + type: 'boolean', + description: 'When true, include approximate returned size/token metrics in the emitted payload. Default: false.', + }, }, required: ['tabId'], }, @@ -277,6 +282,42 @@ const handler: ToolHandler = async ( const withDiagnostics = (result: MCPResult): MCPResult => ( diagnosticsEnabled ? { ...result, _diagnostics: diagnostics } : result ); + const includeMetrics = args.include_metrics === true; + const withTextMetrics = (text: string, emittedMode: string, truncated = hasTruncationMarker(text)): string => { + if (!includeMetrics) return text; + let baseText = text; + let metrics = buildTextMetrics(baseText, { mode: emittedMode, truncated }); + for (let i = 0; i < 8; i++) { + const candidate = appendMetricsFooter(baseText, metrics); + const nextMetrics = buildTextMetrics(candidate, { mode: emittedMode, truncated }); + if (nextMetrics.returned_chars === metrics.returned_chars && nextMetrics.estimated_tokens === metrics.estimated_tokens) { + if (candidate.length <= MAX_OUTPUT_CHARS) return candidate; + const reserve = Math.min(512, Math.max(128, candidate.length - baseText.length + 64)); + baseText = `${baseText.slice(0, Math.max(0, MAX_OUTPUT_CHARS - reserve))} + +[Output truncated — metrics footer reserved output budget]`; + truncated = true; + metrics = buildTextMetrics(baseText, { mode: emittedMode, truncated }); + continue; + } + metrics = nextMetrics; + } + return appendMetricsFooter(baseText, metrics); + }; + const withSemanticMetrics = (view: Record): string => { + if (!includeMetrics) return JSON.stringify(view); + const payload: Record = { ...view }; + let metrics = buildTextMetrics(JSON.stringify(payload), { mode: 'semantic' }); + for (let i = 0; i < 8; i++) { + payload._metrics = metrics; + const text = JSON.stringify(payload); + const nextMetrics = buildTextMetrics(text, { mode: 'semantic' }); + if (nextMetrics.returned_chars === metrics.returned_chars && nextMetrics.estimated_tokens === metrics.estimated_tokens) return text; + metrics = nextMetrics; + } + payload._metrics = metrics; + return JSON.stringify(payload); + }; const axOverflowFallback = (args.fallback as string | undefined) || 'none'; const compactAX = args.compact === true; @@ -325,7 +366,7 @@ const handler: ToolHandler = async ( } const suffix = truncated ? '\n\n[Output truncated — exceeded MAX_OUTPUT_CHARS]' : ''; return { - content: [{ type: 'text', text: md + suffix }], + content: [{ type: 'text', text: withTextMetrics(md + suffix, 'markdown', truncated) }], }; } @@ -487,7 +528,7 @@ const handler: ToolHandler = async ( const includePagination = args.includePagination !== false; const cssPaginationSection = includePagination ? formatPaginationSection(await detectPagination(page, tabId)) : ''; return { - content: [{ type: 'text', text: cssText + cssPaginationSection }], + content: [{ type: 'text', text: withTextMetrics(cssText + cssPaginationSection, 'css') }], }; } @@ -680,7 +721,7 @@ const handler: ToolHandler = async ( ); return { - content: [{ type: 'text', text: JSON.stringify(view) }], + content: [{ type: 'text', text: withSemanticMetrics(view as unknown as Record) }], }; } @@ -730,7 +771,7 @@ const handler: ToolHandler = async ( const domPaginationSection = includePaginationDom ? await measure('paginationMs', async () => formatPaginationSection(await detectPagination(page, tabId))) : ''; const compressedText = statsLine + delta.content + nodeRefsBlock + domPaginationSection; return withDiagnostics({ - content: [{ type: 'text', text: compressedText }], + content: [{ type: 'text', text: withTextMetrics(compressedText, 'dom') }], _compression: { level: 'delta', originalChars: outputText.length, @@ -748,7 +789,7 @@ const handler: ToolHandler = async ( const includePaginationDom = args.includePagination !== false; const domPaginationSection = includePaginationDom ? await measure('paginationMs', async () => formatPaginationSection(await detectPagination(page, tabId))) : ''; return withDiagnostics({ - content: [{ type: 'text', text: outputText + nodeRefsBlock + domPaginationSection }], + content: [{ type: 'text', text: withTextMetrics(outputText + nodeRefsBlock + domPaginationSection, 'dom') }], }); } catch (error) { if (isExplicitDomMode) { @@ -1270,6 +1311,22 @@ const cachedHandler: ToolHandler = async (sessionId, args, context) => { : 'dom'; const headerMode = mode === 'markdown' ? 'html' : mode; const header = { url, title, mode: headerMode as 'ax' | 'dom' | 'css' | 'html', capturedAt: Date.now(), tabId }; + const includeMetrics = args.include_metrics === true; + const refreshSemanticMetrics = (payload: Record): Record => { + if (!includeMetrics || !('_metrics' in payload)) return payload; + const next = { ...payload }; + delete next._metrics; + let metrics = buildTextMetrics(JSON.stringify(next), { mode: 'semantic' }); + for (let i = 0; i < 8; i++) { + next._metrics = metrics; + const text = JSON.stringify(next); + const candidate = buildTextMetrics(text, { mode: 'semantic' }); + if (candidate.returned_chars === metrics.returned_chars && candidate.estimated_tokens === metrics.estimated_tokens) return next; + metrics = candidate; + } + next._metrics = metrics; + return next; + }; return { ...result, @@ -1278,7 +1335,8 @@ const cachedHandler: ToolHandler = async (sessionId, args, context) => { if (mode === 'semantic') { try { const parsed = JSON.parse(block.text) as Record; - return { ...block, text: JSON.stringify(mergeHeaderJson(header, parsed)) }; + const merged = mergeHeaderJson(header, parsed) as Record; + return { ...block, text: JSON.stringify(refreshSemanticMetrics(merged)) }; } catch { return { ...block, text: prependHeaderText(header, block.text) }; } @@ -1288,6 +1346,10 @@ const cachedHandler: ToolHandler = async (sessionId, args, context) => { }; }; +function hasTruncationMarker(text: string): boolean { + return text.includes('...[truncated]') || text.includes('[Output truncated') || text.includes('Content omitted due to size constraints'); +} + export function registerReadPageTool(server: MCPServer): void { server.registerTool('read_page', cachedHandler, definition); } diff --git a/src/tools/validate-page.ts b/src/tools/validate-page.ts index 86b1a4a88..c33c7ce06 100644 --- a/src/tools/validate-page.ts +++ b/src/tools/validate-page.ts @@ -19,6 +19,7 @@ import { getSessionManager } from '../session-manager'; import { smartGoto } from '../utils/smart-goto'; import { safeTitle } from '../utils/safe-title'; import { assertDomainAllowed } from '../security/domain-guard'; +import { buildTextMetrics } from '../core/metrics/token-estimate'; import { isStateHeaderEnabled, prependHeaderText } from './_shared/state-header'; interface ConsoleLogEntry { @@ -106,6 +107,10 @@ const definition: MCPToolDefinition = { type: 'number', description: `How much visible body text to include in the summary. Default: ${DEFAULT_BODY_SAMPLE}, max: ${MAX_BODY_SAMPLE}.`, }, + include_metrics: { + type: 'boolean', + description: 'When true, include approximate output size/token metrics for the returned summary and body sample. Default: false.', + }, }, required: ['url'], }, @@ -127,6 +132,7 @@ const handler: ToolHandler = async ( Math.max((args.bodyTextSampleChars as number) ?? DEFAULT_BODY_SAMPLE, 0), MAX_BODY_SAMPLE, ); + const includeMetrics = args.include_metrics === true; if (!rawUrl) { return { @@ -351,6 +357,12 @@ const handler: ToolHandler = async ( authRedirectHost: authRedirect.host, }), ...(navError && { error: navError }), + ...(includeMetrics && { + metrics: { + summary: buildTextMetrics(summaryLine, { mode: 'validate_page:summary' }), + bodyTextSample: buildTextMetrics(summary.bodyTextSample || '', { mode: 'validate_page:bodyTextSample' }), + }, + }), }; }; diff --git a/tests/core/metrics/token-estimate.test.ts b/tests/core/metrics/token-estimate.test.ts new file mode 100644 index 000000000..bca941a90 --- /dev/null +++ b/tests/core/metrics/token-estimate.test.ts @@ -0,0 +1,46 @@ +import { appendMetricsFooter, buildRawTextMetrics, buildTextMetrics, estimateTokens } from '../../../src/core/metrics/token-estimate'; + +describe('token metrics helpers', () => { + test('estimates empty and ASCII text without provider-specific claims', () => { + expect(estimateTokens('')).toBe(0); + expect(estimateTokens('abcdefghijkl')).toBe(3); + expect(estimateTokens('abcdefghijklm')).toBe(4); + }); + + test('uses a JSON-safe compression ratio for empty returned text', () => { + const metrics = buildRawTextMetrics('raw', ''); + expect(Number.isFinite(metrics.compression_ratio)).toBe(true); + expect(JSON.parse(JSON.stringify(metrics)).compression_ratio).toBe(0); + }); + + test('handles CJK and large strings deterministically', () => { + expect(estimateTokens('한국어문장')).toBe(Math.ceil('한국어문장'.length / 4)); + expect(estimateTokens('x'.repeat(10_001))).toBe(2501); + }); + + test('builds returned text metrics', () => { + expect(buildTextMetrics('hello world', { mode: 'dom' })).toEqual({ + returned_chars: 11, + estimated_tokens: 3, + truncated: false, + mode: 'dom', + }); + }); + + test('builds raw-vs-returned compression metrics', () => { + const metrics = buildRawTextMetrics('x'.repeat(100), 'x'.repeat(20), { mode: 'crawl' }); + expect(metrics).toMatchObject({ + raw_chars: 100, + returned_chars: 20, + raw_estimated_tokens: 25, + estimated_tokens: 5, + compression_ratio: 5, + truncated: false, + mode: 'crawl', + }); + }); + + test('appends a machine-readable metrics footer', () => { + expect(appendMetricsFooter('body', { returned_chars: 4 })).toBe('body\n\n[openchrome_metrics] {"returned_chars":4}'); + }); +}); diff --git a/tests/core/tools/crawl.engine.test.ts b/tests/core/tools/crawl.engine.test.ts index dfa54de31..afb6f46c9 100644 --- a/tests/core/tools/crawl.engine.test.ts +++ b/tests/core/tools/crawl.engine.test.ts @@ -193,25 +193,27 @@ describe('crawl engine=static', () => { }); - test('dispatcher=adaptive includes dispatcher stats without changing fixed default', async () => { + test('include_metrics adds summary and per-page token estimates without changing default', async () => { const handler = await loadHandler('crawl'); - const adaptive = await handler('s-adaptive', { + const withMetrics = await handler('s-metrics', { url: `${server.origin}/index.html`, max_pages: 1, max_depth: 0, delay_ms: 0, engine: 'static', respect_robots: false, - dispatcher: 'adaptive', - dispatcher_options: { min_concurrency: 1, max_concurrency: 3 }, + include_metrics: true, }); - const parsedAdaptive = parseResult(adaptive); - expect(parsedAdaptive.summary.dispatcher).toMatchObject({ - mode: 'adaptive', - min_concurrency: 1, + const parsedWithMetrics = parseResult(withMetrics); + const summaryMetrics = parsedWithMetrics.summary.metrics as Record; + expect(summaryMetrics.returned_chars).toBeGreaterThan(0); + expect(summaryMetrics.estimated_tokens).toBeGreaterThan(0); + expect(parsedWithMetrics.pages[0].metrics).toMatchObject({ + mode: 'markdown', + truncated: false, }); - const fixed = await handler('s-fixed', { + const withoutMetrics = await handler('s-metrics-default', { url: `${server.origin}/index.html`, max_pages: 1, max_depth: 0, @@ -219,8 +221,9 @@ describe('crawl engine=static', () => { engine: 'static', respect_robots: false, }); - const parsedFixed = parseResult(fixed); - expect(parsedFixed.summary.dispatcher).toBeUndefined(); + const parsedWithoutMetrics = parseResult(withoutMetrics); + expect(parsedWithoutMetrics.summary.metrics).toBeUndefined(); + expect(parsedWithoutMetrics.pages[0].metrics).toBeUndefined(); }); test('respect_robots:true does not open a Chrome tab for robots.txt', async () => { diff --git a/tests/tools/inspect-metrics.test.ts b/tests/tools/inspect-metrics.test.ts new file mode 100644 index 000000000..7ba8a991c --- /dev/null +++ b/tests/tools/inspect-metrics.test.ts @@ -0,0 +1,96 @@ +/// + +import { createMockSessionManager } from '../utils/mock-session'; + +jest.mock('../../src/session-manager', () => ({ + getSessionManager: jest.fn(), +})); + +jest.mock('../../src/utils/shadow-dom', () => ({ + getAllShadowRoots: jest.fn().mockResolvedValue({ shadowRoots: [], domTree: {} }), + querySelectorInShadowRoots: jest.fn().mockResolvedValue([]), +})); + +import { getSessionManager } from '../../src/session-manager'; + +describe('InspectTool include_metrics', () => { + test('keeps default inspect output unchanged without metrics', async () => { + const mockSessionManager = createMockSessionManager(); + (getSessionManager as jest.Mock).mockReturnValue(mockSessionManager); + + const sessionId = 'inspect-default-metrics-session'; + const { targetId, page } = await mockSessionManager.createTarget(sessionId, 'about:blank'); + (page.evaluate as jest.Mock).mockResolvedValue({ + focusedInfo: null, + tabs: [], + interactiveCounts: { button: 2 }, + formFields: [], + headings: [], + errors: [], + visiblePanels: [], + url: 'https://example.com', + title: 'Example', + }); + + const { registerInspectTool } = await import('../../src/tools/inspect'); + const tools = new Map) => Promise }>(); + registerInspectTool({ + registerTool: (name: string, handler: unknown) => { + tools.set(name, { handler: handler as (sessionId: string, args: Record) => Promise }); + }, + } as unknown as Parameters[0]); + + const result = await tools.get('inspect')!.handler(sessionId, { + tabId: targetId, + query: 'interactive controls', + }); + + expect(result.content[0].text).toContain('[Interactive Elements] 2 buttons'); + expect(result.content[0].text).not.toContain('[openchrome_metrics]'); + }); + + test('appends approximate token metrics only when requested', async () => { + const mockSessionManager = createMockSessionManager(); + (getSessionManager as jest.Mock).mockReturnValue(mockSessionManager); + + const sessionId = 'inspect-include-metrics-session'; + const { targetId, page } = await mockSessionManager.createTarget(sessionId, 'about:blank'); + (page.evaluate as jest.Mock).mockResolvedValue({ + focusedInfo: null, + tabs: [], + interactiveCounts: { button: 1, link: 3 }, + formFields: [], + headings: [{ level: 1, text: 'Visible Heading' }], + errors: [], + visiblePanels: [], + url: 'https://example.com/repo', + title: 'Repository', + }); + + const { registerInspectTool } = await import('../../src/tools/inspect'); + const tools = new Map) => Promise }>(); + registerInspectTool({ + registerTool: (name: string, handler: unknown) => { + tools.set(name, { handler: handler as (sessionId: string, args: Record) => Promise }); + }, + } as unknown as Parameters[0]); + + const result = await tools.get('inspect')!.handler(sessionId, { + tabId: targetId, + query: 'headings and interactive controls', + include_metrics: true, + }); + const text = result.content[0].text as string; + const [body, metricsLine] = text.split('\n\n[openchrome_metrics] '); + const metrics = JSON.parse(metricsLine); + + expect(body).toContain('[Headings] h1: "Visible Heading"'); + expect(body).toContain('[Interactive Elements] 1 buttons, 3 links'); + expect(metrics).toEqual({ + returned_chars: body.length, + estimated_tokens: Math.ceil(body.length / 4), + truncated: false, + mode: 'inspect:visible', + }); + }); +}); diff --git a/tests/tools/read-page.test.ts b/tests/tools/read-page.test.ts index bc1c232ea..c9e4fd1ff 100644 --- a/tests/tools/read-page.test.ts +++ b/tests/tools/read-page.test.ts @@ -124,6 +124,21 @@ describe('ReadPageTool', () => { }); describe('Accessibility Tree', () => { + test('semantic include_metrics reports the final serialized payload size', async () => { + const handler = await getReadPageHandler(); + + const result = await handler(testSessionId, { + tabId: testTargetId, + mode: 'semantic', + include_metrics: true, + }) as { content: Array<{ type: string; text: string }> }; + + const text = result.content[0].text; + const payload = JSON.parse(text) as { _metrics: { returned_chars: number; estimated_tokens: number } }; + expect(payload._metrics.returned_chars).toBe(text.length); + expect(payload._metrics.estimated_tokens).toBe(Math.ceil(text.length / 4)); + }); + test('returns tree with default depth', async () => { const handler = await getReadPageHandler();