-
Notifications
You must be signed in to change notification settings - Fork 35
feat(core): unified state header on page-state tool responses (#893) #912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
52ec16a
a663bd6
a4649bd
34f1ae2
86e0731
5ae04a9
9f92b57
0c2ed04
403f4cd
67ec8eb
ab483b2
dd57c85
1c9d2b0
e19b264
d3b06c1
5509eaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| /** | ||
| * State Header — unified page-state envelope for tool responses. | ||
| * | ||
| * Prepends a 4-line header to text-mode tool responses so agents can | ||
| * determine which page a snapshot came from without parsing the payload. | ||
| * | ||
| * Opt-out: set OPENCHROME_STATE_HEADER=off (case-insensitive) to restore | ||
| * v1.11.0 byte-identical output. | ||
| */ | ||
|
|
||
| export interface PageStateHeader { | ||
| url: string; | ||
| title: string; | ||
| mode: 'ax' | 'dom' | 'css' | 'html' | 'inspect' | 'validate'; | ||
| capturedAt: number; // Unix ms — server wall-clock at response assembly | ||
| tabId: string; | ||
| } | ||
|
|
||
| /** | ||
| * Returns true when the state header should be included in responses. | ||
| * Default is enabled; set OPENCHROME_STATE_HEADER=off to disable. | ||
| */ | ||
| export function isStateHeaderEnabled(): boolean { | ||
| const val = process.env.OPENCHROME_STATE_HEADER; | ||
| return val === undefined || val.toLowerCase() !== 'off'; | ||
| } | ||
|
|
||
| /** | ||
| * Formats the 4-line header text. | ||
| * The returned string ends with a trailing newline so that | ||
| * `formatHeaderText(h) + existingPayload` is clean without extra newlines. | ||
| * Callers that want a blank separator line should append '\n' before the payload. | ||
| */ | ||
| export function formatHeaderText(h: PageStateHeader): string { | ||
| const capturedAtIso = new Date(h.capturedAt).toISOString(); | ||
| // Escape control characters so a crafted title/url cannot split the fixed | ||
| // 4-line header into extra lines and spoof subsequent fields. | ||
| const safeUrl = h.url.replace(/[\r\n]/g, ' '); | ||
| const safeTitle = h.title.replace(/[\r\n]/g, ' '); | ||
| return ( | ||
| `- Page URL: ${safeUrl}\n` + | ||
| `- Page Title: ${safeTitle}\n` + | ||
| `- Page Mode: ${h.mode}\n` + | ||
| `- Captured At: ${capturedAtIso}\n` | ||
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Prepends the state header (+ blank line) to a text payload. | ||
| * Returns the payload unchanged when the header is disabled. | ||
| */ | ||
| export function prependHeaderText(h: PageStateHeader, payload: string): string { | ||
| if (!isStateHeaderEnabled()) return payload; | ||
| return formatHeaderText(h) + '\n' + payload; | ||
| } | ||
|
|
||
| /** | ||
| * Merges the state header fields into a JSON-mode response object. | ||
| * Returns the object unchanged when the header is disabled. | ||
| */ | ||
| export function mergeHeaderJson<T extends object>(h: PageStateHeader, obj: T): T & { state: PageStateHeader } | T { | ||
| if (!isStateHeaderEnabled()) return obj; | ||
| return { state: h, ...obj }; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,148 +1,155 @@ | ||
| /** | ||
| * Page Content Tool - Get HTML content from page | ||
| */ | ||
| import { MCPServer } from '../mcp-server'; | ||
| import { MCPToolDefinition, MCPResult, ToolHandler } from '../types/mcp'; | ||
| /** | ||
| * Page Content Tool - Get HTML content from page | ||
| */ | ||
|
|
||
| import { MCPServer } from '../mcp-server'; | ||
| import { MCPToolDefinition, MCPResult, ToolHandler } from '../types/mcp'; | ||
| import { TOOL_ANNOTATIONS } from '../types/tool-annotations'; | ||
| import { getSessionManager } from '../session-manager'; | ||
| import { MAX_OUTPUT_CHARS, DEFAULT_NAVIGATION_TIMEOUT_MS } from '../config/defaults'; | ||
| import { withTimeout } from '../utils/with-timeout'; | ||
|
|
||
| const definition: MCPToolDefinition = { | ||
| name: 'page_content', | ||
| description: 'Get HTML content from page or element.', | ||
| inputSchema: { | ||
| type: 'object', | ||
| properties: { | ||
| tabId: { | ||
| type: 'string', | ||
| description: 'Tab ID to get content from', | ||
| }, | ||
| selector: { | ||
| type: 'string', | ||
| description: 'CSS selector. Omit for full page', | ||
| }, | ||
| outerHTML: { | ||
| type: 'boolean', | ||
| description: 'Return outerHTML vs innerHTML. Default: true', | ||
| }, | ||
| }, | ||
| required: ['tabId'], | ||
| }, | ||
| import { getSessionManager } from '../session-manager'; | ||
| import { MAX_OUTPUT_CHARS, DEFAULT_NAVIGATION_TIMEOUT_MS } from '../config/defaults'; | ||
| import { withTimeout } from '../utils/with-timeout'; | ||
| import { mergeHeaderJson, isStateHeaderEnabled } from './_shared/state-header'; | ||
|
|
||
| const definition: MCPToolDefinition = { | ||
| name: 'page_content', | ||
| description: 'Get HTML content from page or element.', | ||
| inputSchema: { | ||
| type: 'object', | ||
| properties: { | ||
| tabId: { | ||
| type: 'string', | ||
| description: 'Tab ID to get content from', | ||
| }, | ||
| selector: { | ||
| type: 'string', | ||
| description: 'CSS selector. Omit for full page', | ||
| }, | ||
| outerHTML: { | ||
| type: 'boolean', | ||
| description: 'Return outerHTML vs innerHTML. Default: true', | ||
| }, | ||
| }, | ||
| required: ['tabId'], | ||
| }, | ||
| annotations: TOOL_ANNOTATIONS.page_content, | ||
| }; | ||
|
|
||
| const handler: ToolHandler = async ( | ||
| sessionId: string, | ||
| args: Record<string, unknown> | ||
| ): Promise<MCPResult> => { | ||
| const tabId = args.tabId as string; | ||
| const selector = args.selector as string | undefined; | ||
| const outerHTML = (args.outerHTML as boolean) ?? true; | ||
|
|
||
| const sessionManager = getSessionManager(); | ||
|
|
||
| if (!tabId) { | ||
| return { | ||
| content: [{ type: 'text', text: 'Error: tabId is required' }], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| try { | ||
| const page = await sessionManager.getPage(sessionId, tabId, undefined, 'page_content'); | ||
| if (!page) { | ||
| return { | ||
| content: [{ type: 'text', text: `Error: Tab ${tabId} not found` }], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| if (selector) { | ||
| // Get content from specific element | ||
| const element = await page.$(selector); | ||
|
|
||
| if (!element) { | ||
| return { | ||
| content: [ | ||
| { | ||
| type: 'text', | ||
| text: JSON.stringify({ | ||
| action: 'page_content', | ||
| selector, | ||
| content: null, | ||
| message: `No element found matching "${selector}"`, | ||
| }), | ||
| }, | ||
| ], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| let html = await withTimeout(page.evaluate( | ||
| (el: Element, getOuter: boolean) => { | ||
| return getOuter ? el.outerHTML : el.innerHTML; | ||
| }, | ||
| element, | ||
| outerHTML | ||
| ), 15000, 'page_content'); | ||
|
|
||
| const originalLength = html.length; | ||
| if (html.length > MAX_OUTPUT_CHARS) { | ||
| html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`; | ||
| } | ||
|
|
||
| return { | ||
| content: [ | ||
| { | ||
| type: 'text', | ||
| text: JSON.stringify({ | ||
| action: 'page_content', | ||
| selector, | ||
| outerHTML, | ||
| contentLength: originalLength, | ||
| content: html, | ||
| }), | ||
| }, | ||
| ], | ||
| }; | ||
| } else { | ||
| // Get full page content | ||
| let html = await withTimeout(page.content(), DEFAULT_NAVIGATION_TIMEOUT_MS, 'page.content()'); | ||
|
|
||
| const originalLength = html.length; | ||
| if (html.length > MAX_OUTPUT_CHARS) { | ||
| html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`; | ||
| } | ||
|
|
||
| return { | ||
| content: [ | ||
| { | ||
| type: 'text', | ||
| text: JSON.stringify({ | ||
| action: 'page_content', | ||
| selector: null, | ||
| contentLength: originalLength, | ||
| content: html, | ||
| }), | ||
| }, | ||
| ], | ||
| }; | ||
| } | ||
| } catch (error) { | ||
| return { | ||
| content: [ | ||
| { | ||
| type: 'text', | ||
| text: `Page content error: ${error instanceof Error ? error.message : String(error)}`, | ||
| }, | ||
| ], | ||
| isError: true, | ||
| }; | ||
| } | ||
| }; | ||
|
|
||
| export function registerPageContentTool(server: MCPServer): void { | ||
| server.registerTool('page_content', handler, definition); | ||
| } | ||
| }; | ||
|
|
||
| const handler: ToolHandler = async ( | ||
| sessionId: string, | ||
| args: Record<string, unknown> | ||
| ): Promise<MCPResult> => { | ||
| const tabId = args.tabId as string; | ||
| const selector = args.selector as string | undefined; | ||
| const outerHTML = (args.outerHTML as boolean) ?? true; | ||
|
|
||
| const sessionManager = getSessionManager(); | ||
|
|
||
| if (!tabId) { | ||
| return { | ||
| content: [{ type: 'text', text: 'Error: tabId is required' }], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| try { | ||
| const page = await sessionManager.getPage(sessionId, tabId, undefined, 'page_content'); | ||
| if (!page) { | ||
| return { | ||
| content: [{ type: 'text', text: `Error: Tab ${tabId} not found` }], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| if (selector) { | ||
| // Get content from specific element | ||
| const element = await page.$(selector); | ||
|
|
||
| if (!element) { | ||
| const missingBody = { | ||
| action: 'page_content', | ||
| selector, | ||
| content: null, | ||
| message: `No element found matching "${selector}"`, | ||
| }; | ||
| const missingWithState = isStateHeaderEnabled() | ||
| ? mergeHeaderJson( | ||
| { url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With Useful? React with 👍 / 👎. |
||
| missingBody, | ||
| ) | ||
| : missingBody; | ||
| return { | ||
| content: [{ type: 'text', text: JSON.stringify(missingWithState) }], | ||
| isError: true, | ||
| }; | ||
| } | ||
|
|
||
| let html = await withTimeout(page.evaluate( | ||
| (el: Element, getOuter: boolean) => { | ||
| return getOuter ? el.outerHTML : el.innerHTML; | ||
| }, | ||
| element, | ||
| outerHTML | ||
| ), 15000, 'page_content'); | ||
|
|
||
| const originalLength = html.length; | ||
| if (html.length > MAX_OUTPUT_CHARS) { | ||
| html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`; | ||
| } | ||
|
|
||
| const elementBody = { | ||
| action: 'page_content', | ||
| selector, | ||
| outerHTML, | ||
| contentLength: originalLength, | ||
| content: html, | ||
| }; | ||
| const elementWithState = isStateHeaderEnabled() | ||
| ? mergeHeaderJson( | ||
| { url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId }, | ||
| elementBody, | ||
| ) | ||
| : elementBody; | ||
| return { | ||
| content: [{ type: 'text', text: JSON.stringify(elementWithState) }], | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With Useful? React with 👍 / 👎. |
||
| }; | ||
| } else { | ||
| // Get full page content | ||
| let html = await withTimeout(page.content(), DEFAULT_NAVIGATION_TIMEOUT_MS, 'page.content()'); | ||
|
|
||
| const originalLength = html.length; | ||
| if (html.length > MAX_OUTPUT_CHARS) { | ||
| html = html.substring(0, MAX_OUTPUT_CHARS) + `\n\n[Truncated: ${originalLength} chars total, showing first ${MAX_OUTPUT_CHARS}]`; | ||
| } | ||
|
|
||
| const fullPageBody = { | ||
| action: 'page_content', | ||
| selector: null, | ||
| contentLength: originalLength, | ||
| content: html, | ||
| }; | ||
| const fullPageWithState = isStateHeaderEnabled() | ||
| ? mergeHeaderJson( | ||
| { url: page.url(), title: await page.title(), mode: 'html' as const, capturedAt: Date.now(), tabId }, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In the new state-header path, Useful? React with 👍 / 👎. |
||
| fullPageBody, | ||
|
Comment on lines
+131
to
+133
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This branch captures HTML first and then fetches Useful? React with 👍 / 👎. |
||
| ) | ||
| : fullPageBody; | ||
| return { | ||
| content: [{ type: 'text', text: JSON.stringify(fullPageWithState) }], | ||
| }; | ||
| } | ||
| } catch (error) { | ||
| return { | ||
| content: [ | ||
| { | ||
| type: 'text', | ||
| text: `Page content error: ${error instanceof Error ? error.message : String(error)}`, | ||
| }, | ||
| ], | ||
| isError: true, | ||
| }; | ||
| } | ||
| }; | ||
|
|
||
| export function registerPageContentTool(server: MCPServer): void { | ||
| server.registerTool('page_content', handler, definition); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
page_contenttool definition now omits theannotationsfield even thoughMCPToolDefinitiontreats it as required, sotools/listcan emit a schema-incomplete entry for this tool and downstream clients that rely on annotation hints (or strict schema validation) may reject or mis-handle it. This also violates the repo’s own contract check intests/unit/tool-annotations.test.tsthat every tool declaresannotations: TOOL_ANNOTATIONS.<tool>, making this a regression introduced by this change.Useful? React with 👍 / 👎.