shaun0927 · shaun0927 · May 13, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/src/core/metrics/token-estimate.ts b/src/core/metrics/token-estimate.ts
@@ -0,0 +1,52 @@
+export interface TextMetrics {
+  returned_chars: number;
+  estimated_tokens: number;
+  truncated: boolean;
+  mode?: string;
+}
+
+export interface RawTextMetrics extends TextMetrics {
+  raw_chars: number;
+  raw_estimated_tokens: number;
+  compression_ratio: number;
+}
+
+export function estimateTokens(text: string): number {
+  if (text.length === 0) return 0;
+  // Deliberately approximate and provider-neutral. The field name is
+  // `estimated_tokens`, not exact tokens.
+  return Math.ceil(text.length / 4);
+}
+
+export function buildTextMetrics(text: string, opts?: { mode?: string; truncated?: boolean }): TextMetrics {
+  return {
+    returned_chars: text.length,
+    estimated_tokens: estimateTokens(text),
+    truncated: opts?.truncated ?? text.includes('...[truncated]'),
+    ...(opts?.mode ? { mode: opts.mode } : {}),
+  };
+}
+
+export function buildRawTextMetrics(
+  rawText: string,
+  returnedText: string,
+  opts?: { mode?: string; truncated?: boolean },
+): RawTextMetrics {
+  const rawTokens = estimateTokens(rawText);
+  const returnedTokens = estimateTokens(returnedText);
+  return {
+    raw_chars: rawText.length,
+    raw_estimated_tokens: rawTokens,
+    returned_chars: returnedText.length,
+    estimated_tokens: returnedTokens,
+    compression_ratio: returnedText.length > 0
+      ? Number((rawText.length / returnedText.length).toFixed(3))
+      : rawText.length === 0 ? 1 : 0,
+    truncated: opts?.truncated ?? returnedText.includes('...[truncated]'),
+    ...(opts?.mode ? { mode: opts.mode } : {}),
+  };
+}
+
+export function appendMetricsFooter(text: string, metrics: object): string {
+  return `${text}\n\n[openchrome_metrics] ${JSON.stringify(metrics)}`;
+}
diff --git a/src/tools/crawl-sitemap.ts b/src/tools/crawl-sitemap.ts
@@ -27,6 +27,7 @@ import {
   StaticFetchError,
   StaticReason,
 } from '../utils/static-fetch';
+import { buildTextMetrics } from '../core/metrics/token-estimate';
 import { extractMainContent, toMarkdown } from '../core/extract/html-to-markdown';
 import { sanitizeContent } from '../security/content-sanitizer';
 import { getGlobalConfig } from '../config/global';
@@ -77,6 +78,10 @@ const definition: MCPToolDefinition = {
         description:
           'Fetch engine: "cdp" (default, opens a Chrome tab per page), "static" (Node fetch only, fails closed on insufficient pages), or "auto" (static first, fall back to CDP when static is insufficient).',
       },
+      include_metrics: {
+        type: 'boolean',
+        description: 'When true, include approximate output size/token metrics in the JSON result. Default: false.',
+      },
     },
     required: ['url'],
   },
@@ -263,7 +268,6 @@ async function resolveSitemapPageUrls(
 // the caller (auto mode) can fall back to CDP.
 // ---------------------------------------------------------------------------
 
-
 function cleanMarkdownFromHtml(
   html: string,
   cleanOpts: { onlyMainContent: boolean; includeLinks: boolean },
@@ -581,6 +585,7 @@ const handler: ToolHandler = async (
   };
   const concurrency = args.concurrency != null ? Math.max(1, Math.min(10, Number(args.concurrency))) : 3;
 
+  const includeMetrics = args.include_metrics === true;
   const engineArg = args.engine as string | undefined;
   let engine: EngineMode = 'cdp';
   if (engineArg === 'static' || engineArg === 'auto' || engineArg === 'cdp') {
@@ -781,10 +786,26 @@ const handler: ToolHandler = async (
       sitemap_source: sitemapSource,
     };
 
-    const output = { summary, pages };
+    const buildOutput = (outputPages: CrawledPage[]) => includeMetrics
+      ? {
+          summary: {
+            ...summary,
+            metrics: {
+              returned_chars: outputPages.reduce((sum, p) => sum + p.content.length, 0),
+              estimated_tokens: outputPages.reduce((sum, p) => sum + buildTextMetrics(p.content).estimated_tokens, 0),
+              truncated_pages: outputPages.filter((p) => p.content.includes('...[truncated]')).length,
+              mode: `crawl_sitemap:${outputFormat}`,
+            },
+          },
+          pages: outputPages.map((p) => ({
+            ...p,
+            metrics: buildTextMetrics(p.content, { mode: outputFormat }),
+          })),
+        }
+      : { summary, pages: outputPages };
 
     // Ensure output fits within limits
-    let outputJson = JSON.stringify(output, null, 2);
+    let outputJson = JSON.stringify(buildOutput(pages), null, 2);
     if (outputJson.length > MAX_OUTPUT_CHARS) {
       // Truncate page contents progressively to fit
       const truncatedPages = pages.map((p) => ({
@@ -794,7 +815,7 @@ const handler: ToolHandler = async (
             ? p.content.slice(0, 2000) + '...[truncated]'
             : p.content,
       }));
-      outputJson = JSON.stringify({ summary, pages: truncatedPages }, null, 2);
+      outputJson = JSON.stringify(buildOutput(truncatedPages), null, 2);
 
       // If still too large, remove content entirely
       if (outputJson.length > MAX_OUTPUT_CHARS) {
@@ -804,12 +825,41 @@ const handler: ToolHandler = async (
           links_found: p.links_found,
           content_length: p.content.length,
           error: p.error,
+          ...(includeMetrics && { metrics: buildTextMetrics('', { mode: outputFormat, truncated: true }) }),
         }));
+        // Per-page metrics are computed from empty strings (content omitted),
+        // so the summary metrics must align with what is actually emitted —
+        // not the original full-content pages.
+        const emptyPageMetrics = buildTextMetrics('', { mode: outputFormat, truncated: true });
+        const minimalSummary = includeMetrics
+          ? {
+              ...summary,
+              metrics: {
+                returned_chars: minimalPages.reduce(
+                  (sum, p) => sum + (p.metrics?.returned_chars ?? 0),
+                  0,
+                ),
+                estimated_tokens: minimalPages.reduce(
+                  (sum, p) => sum + (p.metrics?.estimated_tokens ?? emptyPageMetrics.estimated_tokens),
+                  0,
+                ),
+                truncated_pages: pages.length,
+                mode: `crawl_sitemap:${outputFormat}`,
+              },
+            }
+          : summary;
         outputJson = JSON.stringify(
-          { summary, pages: minimalPages, note: 'Content omitted due to size constraints' },
+          { summary: minimalSummary, pages: minimalPages, note: 'Content omitted due to size constraints' },
           null,
           2,
         );
+        if (outputJson.length > MAX_OUTPUT_CHARS) {
+          outputJson = JSON.stringify({
+            summary: minimalSummary,
+            pages: minimalPages.map(({ url, title, links_found, content_length, error }) => ({ url, title, links_found, content_length, error })),
+            note: 'Content omitted due to size constraints',
+          }, null, 2);
+        }
       }
     }
 

diff --git a/src/tools/crawl.ts b/src/tools/crawl.ts
@@ -29,6 +29,7 @@ import {
   StaticFetchError,
   StaticReason,
 } from '../utils/static-fetch';
+import { buildTextMetrics } from '../core/metrics/token-estimate';
 import { buildUrlScoreOptions, scoreUrl, UrlScoreOptions } from '../core/crawl/url-scorer';
 import { extractMainContent, toMarkdown } from '../core/extract/html-to-markdown';
 import { sanitizeContent } from '../security/content-sanitizer';
@@ -100,6 +101,10 @@ const definition: MCPToolDefinition = {
         description:
           'Fetch engine: "cdp" (default, opens a Chrome tab per page), "static" (Node fetch only, fails closed on insufficient pages), or "auto" (static first, fall back to CDP when static is insufficient).',
       },
+      include_metrics: {
+        type: 'boolean',
+        description: 'When true, include approximate output size/token metrics in the JSON result. Default: false.',
+      },
       strategy: {
         type: 'string',
         enum: ['bfs', 'best_first'],
@@ -239,7 +244,6 @@ async function fetchRobotsTxt(
 // the caller (auto mode) can fall back to CDP.
 // ---------------------------------------------------------------------------
 
-
 function cleanMarkdownFromHtml(
   html: string,
   cleanOpts: { onlyMainContent: boolean; includeLinks: boolean },
@@ -355,7 +359,9 @@ async function fetchPageStatic(
 /** Options for `fetchOnePage`, shared by legacy crawl and host-driven crawl jobs. */
 export interface FetchOnePageOptions {
   outputFormat: string;
+  /** When true (default), strip nav/footer/ads from extracted content. */
   onlyMainContent?: boolean;
+  /** When true, include outgoing links in the result for BFS expansion. */
   includeLinks?: boolean;
 }
 
@@ -614,6 +620,7 @@ const handler: ToolHandler = async (
   const delayMs = args.delay_ms != null ? Number(args.delay_ms) : 1000;
   const concurrency = args.concurrency != null ? Math.max(1, Math.min(10, Number(args.concurrency))) : 3;
 
+  const includeMetrics = args.include_metrics === true;
   const engineArg = args.engine as string | undefined;
   let engine: EngineMode = 'cdp';
   if (engineArg === 'static' || engineArg === 'auto' || engineArg === 'cdp') {
@@ -961,10 +968,26 @@ const handler: ToolHandler = async (
       ...(adaptiveDispatcher ? { dispatcher: adaptiveDispatcher.stats() } : {}),
     };
 
-    const output = { summary, pages };
+    const buildOutput = (outputPages: CrawledPage[]) => includeMetrics
+      ? {
+          summary: {
+            ...summary,
+            metrics: {
+              returned_chars: outputPages.reduce((sum, p) => sum + p.content.length, 0),
+              estimated_tokens: outputPages.reduce((sum, p) => sum + buildTextMetrics(p.content).estimated_tokens, 0),
+              truncated_pages: outputPages.filter((p) => p.content.includes('...[truncated]')).length,
+              mode: `crawl:${outputFormat}`,
+            },
+          },
+          pages: outputPages.map((p) => ({
+            ...p,
+            metrics: buildTextMetrics(p.content, { mode: outputFormat }),
+          })),
+        }
+      : { summary, pages: outputPages };
 
     // Ensure output fits within limits
-    let outputJson = JSON.stringify(output, null, 2);
+    let outputJson = JSON.stringify(buildOutput(pages), null, 2);
     if (outputJson.length > MAX_OUTPUT_CHARS) {
       // Truncate page contents progressively to fit
       const truncatedPages = pages.map((p) => ({
@@ -973,7 +996,7 @@ const handler: ToolHandler = async (
           ? p.content.slice(0, 2000) + '...[truncated]'
           : p.content,
       }));
-      outputJson = JSON.stringify({ summary, pages: truncatedPages }, null, 2);
+      outputJson = JSON.stringify(buildOutput(truncatedPages), null, 2);
 
       // If still too large, remove content entirely
       if (outputJson.length > MAX_OUTPUT_CHARS) {
@@ -985,7 +1008,37 @@ const handler: ToolHandler = async (
           content_length: p.content.length,
           error: p.error,
         }));
-        outputJson = JSON.stringify({ summary, pages: minimalPages, note: 'Content omitted due to size constraints' }, null, 2);
+        const minimalOutput = includeMetrics
+          ? {
+              summary: {
+                ...summary,
+                metrics: {
+                  returned_chars: 0,
+                  estimated_tokens: 0,
+                  truncated_pages: minimalPages.length,
+                  mode: `crawl:${outputFormat}`,
+                },
+              },
+              pages: minimalPages.map((p) => ({
+                ...p,
+                metrics: buildTextMetrics('', { mode: outputFormat, truncated: true }),
+              })),
+              note: 'Content omitted due to size constraints',
+            }
+          : { summary, pages: minimalPages, note: 'Content omitted due to size constraints' };
+        outputJson = JSON.stringify(minimalOutput, null, 2);
+        if (outputJson.length > MAX_OUTPUT_CHARS) {
+          outputJson = JSON.stringify({
+            summary: includeMetrics
+              ? {
+                  ...summary,
+                  metrics: { returned_chars: 0, estimated_tokens: 0, truncated_pages: pages.length, mode: `crawl:${outputFormat}` },
+                }
+              : summary,
+            pages: minimalPages.map(({ url, title, depth, links_found, content_length, error }) => ({ url, title, depth, links_found, content_length, error })),
+            note: 'Content omitted due to size constraints',
+          }, null, 2);
+        }
       }
     }
 

diff --git a/src/tools/inspect.ts b/src/tools/inspect.ts
@@ -14,6 +14,7 @@ import { TOOL_ANNOTATIONS } from '../types/tool-annotations';
 import { getSessionManager } from '../session-manager';
 import { withTimeout } from '../utils/with-timeout';
 import { getAllShadowRoots, querySelectorInShadowRoots } from '../utils/shadow-dom';
+import { appendMetricsFooter, buildTextMetrics } from '../core/metrics/token-estimate';
 import { prependHeaderText } from './_shared/state-header';
 import {
   formatNodeRefToken,
@@ -40,6 +41,10 @@ const definition: MCPToolDefinition = {
         enum: ['interactive', 'all', 'visible'],
         description: 'Element scope. Default: visible',
       },
+      include_metrics: {
+        type: 'boolean',
+        description: 'When true, append approximate returned size/token metrics to text output. Default: false.',
+      },
     },
     required: ['tabId', 'query'],
   },
@@ -108,6 +113,7 @@ const handler: ToolHandler = async (
   const tabId = args.tabId as string;
   const query = args.query as string;
   const scope = (args.scope as string) || 'visible';
+  const includeMetrics = args.include_metrics === true;
 
   const sessionManager = getSessionManager();
 
@@ -578,10 +584,15 @@ const handler: ToolHandler = async (
 
     // Footer with page context (always included)
     lines.push(`[Page] ${inspectResult.url} | "${inspectResult.title}"`);
-
     const inspectPayload = lines.join('\n');
+    const headeredText = prependHeaderText({ url: inspectResult.url, title: inspectResult.title, mode: 'inspect', capturedAt: Date.now(), tabId }, inspectPayload);
     return {
-      content: [{ type: 'text', text: prependHeaderText({ url: inspectResult.url, title: inspectResult.title, mode: 'inspect', capturedAt: Date.now(), tabId }, inspectPayload) }],
+      content: [{
+        type: 'text',
+        text: includeMetrics
+          ? appendMetricsFooter(headeredText, buildTextMetrics(headeredText, { mode: `inspect:${scope}` }))
+          : headeredText,
+      }],
     };
   } catch (error) {
     return {