Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/tools/_shared/state-header.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/**
* State Header — unified page-state envelope for tool responses.
*
* Prepends a 4-line header to text-mode tool responses so agents can
* determine which page a snapshot came from without parsing the payload.
*
* Opt-out: set OPENCHROME_STATE_HEADER=off (case-insensitive) to restore
* v1.11.0 byte-identical output.
*/

export interface PageStateHeader {
url: string;
title: string;
mode: 'ax' | 'dom' | 'css' | 'html' | 'inspect' | 'validate';
capturedAt: number; // Unix ms — server wall-clock at response assembly
tabId: string;
}

/**
* Returns true when the state header should be included in responses.
* Default is enabled; set OPENCHROME_STATE_HEADER=off to disable.
*/
export function isStateHeaderEnabled(): boolean {
const val = process.env.OPENCHROME_STATE_HEADER;
return val === undefined || val.toLowerCase() !== 'off';
}

/**
* Formats the 4-line header text.
* The returned string ends with a trailing newline so that
* `formatHeaderText(h) + existingPayload` is clean without extra newlines.
* Callers that want a blank separator line should append '\n' before the payload.
*/
export function formatHeaderText(h: PageStateHeader): string {
const capturedAtIso = new Date(h.capturedAt).toISOString();
// Escape control characters so a crafted title/url cannot split the fixed
// 4-line header into extra lines and spoof subsequent fields.
const safeUrl = h.url.replace(/[\r\n]/g, ' ');
const safeTitle = h.title.replace(/[\r\n]/g, ' ');
return (
`- Page URL: ${safeUrl}\n` +
`- Page Title: ${safeTitle}\n` +
`- Page Mode: ${h.mode}\n` +
`- Captured At: ${capturedAtIso}\n`
);
}

/**
* Prepends the state header (+ blank line) to a text payload.
* Returns the payload unchanged when the header is disabled.
*/
export function prependHeaderText(h: PageStateHeader, payload: string): string {
if (!isStateHeaderEnabled()) return payload;
return formatHeaderText(h) + '\n' + payload;
}

/**
* Merges the state header fields into a JSON-mode response object.
* Returns the object unchanged when the header is disabled.
*/
export function mergeHeaderJson<T extends object>(h: PageStateHeader, obj: T): T & { state: PageStateHeader } | T {
if (!isStateHeaderEnabled()) return obj;
return { state: h, ...obj };
}
4 changes: 3 additions & 1 deletion src/tools/inspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { TOOL_ANNOTATIONS } from '../types/tool-annotations';
import { getSessionManager } from '../session-manager';
import { withTimeout } from '../utils/with-timeout';
import { getAllShadowRoots, querySelectorInShadowRoots } from '../utils/shadow-dom';
import { prependHeaderText } from './_shared/state-header';
import {
formatNodeRefToken,
getCurrentLoaderId,
Expand Down Expand Up @@ -578,8 +579,9 @@ const handler: ToolHandler = async (
// Footer with page context (always included)
lines.push(`[Page] ${inspectResult.url} | "${inspectResult.title}"`);

const inspectPayload = lines.join('\n');
return {
content: [{ type: 'text', text: lines.join('\n') }],
content: [{ type: 'text', text: prependHeaderText({ url: inspectResult.url, title: inspectResult.title, mode: 'inspect', capturedAt: Date.now(), tabId }, inspectPayload) }],
};
} catch (error) {
return {
Expand Down
299 changes: 153 additions & 146 deletions src/tools/page-content.ts
Original file line number Diff line number Diff line change
@@ -1,148 +1,155 @@
/**
* Page Content Tool - Get HTML content from page
*/
import { MCPServer } from '../mcp-server';
import { MCPToolDefinition, MCPResult, ToolHandler } from '../types/mcp';
/**
* Page Content Tool - Get HTML content from page
*/

import { MCPServer } from '../mcp-server';
import { MCPToolDefinition, MCPResult, ToolHandler } from '../types/mcp';
import { TOOL_ANNOTATIONS } from '../types/tool-annotations';
import { getSessionManager } from '../session-manager';
import { MAX_OUTPUT_CHARS, DEFAULT_NAVIGATION_TIMEOUT_MS } from '../config/defaults';
import { withTimeout } from '../utils/with-timeout';

const definition: MCPToolDefinition = {
name: 'page_content',
description: 'Get HTML content from page or element.',
inputSchema: {
type: 'object',
properties: {
tabId: {
type: 'string',
description: 'Tab ID to get content from',
},
selector: {
type: 'string',
description: 'CSS selector. Omit for full page',
},
outerHTML: {
type: 'boolean',
description: 'Return outerHTML vs innerHTML. Default: true',
},
},
required: ['tabId'],
},
import { getSessionManager } from '../session-manager';
import { MAX_OUTPUT_CHARS, DEFAULT_NAVIGATION_TIMEOUT_MS } from '../config/defaults';
import { withTimeout } from '../utils/with-timeout';
import { mergeHeaderJson, isStateHeaderEnabled } from './_shared/state-header';

const definition: MCPToolDefinition = {
name: 'page_content',
description: 'Get HTML content from page or element.',
inputSchema: {
type: 'object',
properties: {
tabId: {
type: 'string',
description: 'Tab ID to get content from',
},
selector: {
type: 'string',
description: 'CSS selector. Omit for full page',
},
outerHTML: {
type: 'boolean',
description: 'Return outerHTML vs innerHTML. Default: true',
},
},
required: ['tabId'],
},
annotations: TOOL_ANNOTATIONS.page_content,
};

const handler: ToolHandler = async (
sessionId: string,
args: Record<string, unknown>
): Promise<MCPResult> => {
const tabId = args.tabId as string;
const selector = args.selector as string | undefined;
const outerHTML = (args.outerHTML as boolean) ?? true;

const sessionManager = getSessionManager();

if (!tabId) {
return {
content: [{ type: 'text', text: 'Error: tabId is required' }],
isError: true,
};
}

try {
const page = await sessionManager.getPage(sessionId, tabId, undefined, 'page_content');
if (!page) {
return {
content: [{ type: 'text', text: `Error: Tab ${tabId} not found` }],
isError: true,
};
}

if (selector) {
// Get content from specific element
const element = await page.$(selector);

if (!element) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
action: 'page_content',
selector,
content: null,
message: `No element found matching "${selector}"`,
}),
},
],
isError: true,
};
}

let html = await withTimeout(page.evaluate(
(el: Element, getOuter: boolean) => {
return getOuter ? el.outerHTML : el.innerHTML;
},
element,
outerHTML
), 15000, 'page_content');

const originalLength = html.length;
if (html.length > MAX_OUTPUT_CHARS) {
html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`;
}

return {
content: [
{
type: 'text',
text: JSON.stringify({
action: 'page_content',
selector,
outerHTML,
contentLength: originalLength,
content: html,
}),
},
],
};
} else {
// Get full page content
let html = await withTimeout(page.content(), DEFAULT_NAVIGATION_TIMEOUT_MS, 'page.content()');

const originalLength = html.length;
if (html.length > MAX_OUTPUT_CHARS) {
html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`;
}

return {
content: [
{
type: 'text',
text: JSON.stringify({
action: 'page_content',
selector: null,
contentLength: originalLength,
content: html,
}),
},
],
};
}
} catch (error) {
return {
content: [
{
type: 'text',
text: `Page content error: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
};

export function registerPageContentTool(server: MCPServer): void {
server.registerTool('page_content', handler, definition);
}
};
Comment on lines +32 to +35
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Restore required annotations on page_content definition

The page_content tool definition now omits the annotations field even though MCPToolDefinition treats it as required, so tools/list can emit a schema-incomplete entry for this tool and downstream clients that rely on annotation hints (or strict schema validation) may reject or mis-handle it. This also violates the repo’s own contract check in tests/unit/tool-annotations.test.ts that every tool declares annotations: TOOL_ANNOTATIONS.<tool>, making this a regression introduced by this change.

Useful? React with 👍 / 👎.


const handler: ToolHandler = async (
sessionId: string,
args: Record<string, unknown>
): Promise<MCPResult> => {
const tabId = args.tabId as string;
const selector = args.selector as string | undefined;
const outerHTML = (args.outerHTML as boolean) ?? true;

const sessionManager = getSessionManager();

if (!tabId) {
return {
content: [{ type: 'text', text: 'Error: tabId is required' }],
isError: true,
};
}

try {
const page = await sessionManager.getPage(sessionId, tabId, undefined, 'page_content');
if (!page) {
return {
content: [{ type: 'text', text: `Error: Tab ${tabId} not found` }],
isError: true,
};
}

if (selector) {
// Get content from specific element
const element = await page.$(selector);

if (!element) {
const missingBody = {
action: 'page_content',
selector,
content: null,
message: `No element found matching "${selector}"`,
};
const missingWithState = isStateHeaderEnabled()
? mergeHeaderJson(
{ url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId },
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Make state-header title lookup best-effort in page_content

With OPENCHROME_STATE_HEADER enabled, this path now awaits page.title() while assembling the response object; because it is inside the handler-wide try, a transient navigation/target-close error in title retrieval will flip an otherwise successful page_content result into a tool error. The HTML payload is already available at this point, so header metadata lookup should be non-fatal (fallback title) to avoid introducing intermittent regressions.

Useful? React with 👍 / 👎.

missingBody,
)
: missingBody;
return {
content: [{ type: 'text', text: JSON.stringify(missingWithState) }],
isError: true,
};
}

let html = await withTimeout(page.evaluate(
(el: Element, getOuter: boolean) => {
return getOuter ? el.outerHTML : el.innerHTML;
},
element,
outerHTML
), 15000, 'page_content');

const originalLength = html.length;
if (html.length > MAX_OUTPUT_CHARS) {
html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`;
}

const elementBody = {
action: 'page_content',
selector,
outerHTML,
contentLength: originalLength,
content: html,
};
const elementWithState = isStateHeaderEnabled()
? mergeHeaderJson(
{ url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId },
elementBody,
)
: elementBody;
return {
content: [{ type: 'text', text: JSON.stringify(elementWithState) }],
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Prepend state header to page_content text responses

With OPENCHROME_STATE_HEADER enabled, this branch still emits content[0].text as raw JSON only, while other page-state tools now prepend the 4-line state preamble on the text channel. Any client that implements the new cross-tool text-header parsing contract will fail specifically on page_content, creating inconsistent behavior across tools in the same rollout.

Useful? React with 👍 / 👎.

};
} else {
// Get full page content
let html = await withTimeout(page.content(), DEFAULT_NAVIGATION_TIMEOUT_MS, 'page.content()');

const originalLength = html.length;
if (html.length > MAX_OUTPUT_CHARS) {
html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`;
}

const fullPageBody = {
action: 'page_content',
selector: null,
contentLength: originalLength,
content: html,
};
const fullPageWithState = isStateHeaderEnabled()
? mergeHeaderJson(
{ url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId },
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Make page title lookup best-effort in page_content

In the new state-header path, page_content now awaits page.title() while assembling the response after HTML has already been captured. If the tab navigates or closes between page.content()/page.evaluate(...) and this lookup, Puppeteer throws and the outer catch converts an otherwise successful snapshot into Page content error. This regression affects unstable or redirecting pages and should be avoided by using a safe/best-effort title fallback so metadata enrichment cannot discard completed content.

Useful? React with 👍 / 👎.

fullPageBody,
Comment on lines +131 to +133
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Capture page state atomically with page_content payload

This branch captures HTML first and then fetches page.url()/page.title() for the new state object, so on pages that auto-navigate or redirect between those awaits, the returned metadata can describe a different document than the HTML payload. Because the feature’s purpose is to identify the snapshot’s origin, state fields should be derived from the same capture point as the content (or from already-captured values) instead of a later probe.

Useful? React with 👍 / 👎.

)
: fullPageBody;
return {
content: [{ type: 'text', text: JSON.stringify(fullPageWithState) }],
};
}
} catch (error) {
return {
content: [
{
type: 'text',
text: `Page content error: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
};

export function registerPageContentTool(server: MCPServer): void {
server.registerTool('page_content', handler, definition);
}
Loading
Loading