diff --git a/src/dom/dom-serializer.ts b/src/dom/dom-serializer.ts index d93fc444f..1c9034abe 100644 --- a/src/dom/dom-serializer.ts +++ b/src/dom/dom-serializer.ts @@ -18,6 +18,7 @@ export interface DOMSerializerOptions { // light (default): sibling dedup threshold=4, container collapse enabled // aggressive: sibling dedup threshold=3 includeUserAgentShadowDOM?: boolean; // default: false + planningProfile?: 'default' | 'stable'; } export interface PageStats { @@ -137,6 +138,33 @@ function escapeAttributeValue(value: string): string { }); } +const ID_REFERENCE_ATTRS = new Set([ + 'for', + 'aria-labelledby', + 'aria-describedby', + 'aria-activedescendant', + 'aria-controls', + 'aria-owns', + 'aria-flowto', + 'aria-details', +]); + +function collectReferencedIds(node: DOMNode, referencedIds: Set): void { + if (node.nodeType === NODE_TYPE_ELEMENT) { + const attrMap = parseAttributes(node.attributes); + for (const attr of ID_REFERENCE_ATTRS) { + const value = attrMap.get(attr); + if (!value) continue; + for (const id of value.split(/\s+/).filter(Boolean)) { + referencedIds.add(id); + } + } + } + + for (const child of node.children || []) collectReferencedIds(child, referencedIds); + if (node.contentDocument) collectReferencedIds(node.contentDocument, referencedIds); + for (const shadowRoot of node.shadowRoots || []) collectReferencedIds(shadowRoot, referencedIds); +} /** * Check if a node is interactive */ @@ -188,6 +216,54 @@ function getDirectTextContent(node: DOMNode): string { /** * Format a single element node as a line */ +function isVolatileStableAttr(name: string, value: string): boolean { + if (name === 'id') { + const hasRandomKeyword = /(?:^|[-_])(uuid|random|nonce|session|generated|ember|react-aria)[-_]?[a-z0-9]*$/i.test(value); + const hasUuidShape = /[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/i.test(value); + const longHex = value.match(/[0-9a-f]{16,}/i)?.[0]; + const hasMixedLongHex = !!longHex && /[a-f]/i.test(longHex) && /\d/.test(longHex); + return hasRandomKeyword || hasUuidShape || hasMixedLongHex; + } + if (name === 'class') { + return false; + } + return false; +} + +function hasMeaningfulStableDescendant(node: DOMNode): boolean { + for (const child of node.children || []) { + if (child.nodeType !== NODE_TYPE_ELEMENT) continue; + const childTag = child.localName || child.nodeName.toLowerCase(); + const childAttrs = parseAttributes(child.attributes); + if (!isDecorativeMedia(childTag, childAttrs, isInteractive(childTag, childAttrs))) return true; + if (hasMeaningfulStableDescendant(child)) return true; + } + return false; +} + +function isDecorativeMedia(tagName: string, attrMap: Map, interactive: boolean): boolean { + if (interactive) return false; + if (!['img', 'picture', 'source', 'video', 'canvas'].includes(tagName)) return false; + if ( + attrMap.has('alt') || + attrMap.has('title') || + attrMap.has('aria-label') || + attrMap.has('role') || + attrMap.has('data-testid') || + attrMap.has('controls') || + attrMap.has('tabindex') + ) return false; + return true; +} + +function isDecorativeMediaNode(node: DOMNode): boolean { + if (node.nodeType !== NODE_TYPE_ELEMENT) return false; + const tagName = node.localName || node.nodeName.toLowerCase(); + const attrMap = parseAttributes(node.attributes); + return isDecorativeMedia(tagName, attrMap, isInteractive(tagName, attrMap)) + && !hasMeaningfulStableDescendant(node); +} + function formatElement( node: DOMNode, attrMap: Map, @@ -195,13 +271,20 @@ function formatElement( textContent: string, interactive: boolean, hints?: string, + planningProfile: 'default' | 'stable' = 'default', + referencedIds: Set = new Set(), ): string { const tagName = node.localName || node.nodeName.toLowerCase(); // Build attribute string with only kept attrs const attrParts: string[] = []; for (const [k, v] of attrMap) { - if (KEEP_ATTRS.has(k)) { + if (KEEP_ATTRS.has(k) || (planningProfile === 'stable' && k === 'controls')) { + if ( + planningProfile === 'stable' + && isVolatileStableAttr(k, v) + && !(k === 'id' && referencedIds.has(v)) + ) continue; attrParts.push(`${k}="${escapeAttributeValue(v)}"`); } } @@ -323,6 +406,8 @@ interface SerializeContext { interactiveOnly: boolean; compression: 'none' | 'light' | 'aggressive'; includeUserAgentShadowDOM: boolean; + planningProfile: 'default' | 'stable'; + referencedIds: Set; nodesVisited: number; maxNodes: number; customInteractiveHints: Map; @@ -411,6 +496,23 @@ function serializeNode( const customHints = ctx.customInteractiveHints.get(path); const interactive = isInteractive(tagName, attrMap, customHints); + if (ctx.planningProfile === 'stable' && isDecorativeMedia(tagName, attrMap, interactive)) { + const fallbackText = getDirectTextContent(node); + const indent = ' '.repeat(depth); + if (fallbackText) { + const line = formatElement(node, attrMap, indent, fallbackText, interactive, customHints, ctx.planningProfile, ctx.referencedIds); + if (!appendBoundedLine(ctx, line + '\n')) return; + ctx.emittedBackendNodeIds.add(node.backendNodeId); + } + // Omit decorative media wrappers without fallback text, but still inspect + // descendants so meaningful labels inside survive. + for (const child of node.children || []) { + serializeNode(child, depth + 1, ctx); + if (ctx.truncated) return; + } + return; + } + const indent = ' '.repeat(depth); // Container chain collapse (only in non-'none' compression mode, non-interactive containers) @@ -429,7 +531,7 @@ function serializeNode( const leafHints = ctx.customInteractiveHints.get(leafPath); const leafInteractive = isInteractive(leafTag, leafAttrMap, leafHints); const leafText = getDirectTextContent(leaf); - const leafLine = formatElement(leaf, leafAttrMap, '', leafText, leafInteractive, leafHints); + const leafLine = formatElement(leaf, leafAttrMap, '', leafText, leafInteractive, leafHints, ctx.planningProfile, ctx.referencedIds); const fullLine = `${indent}${chainPrefix}${leafLine}\n`; if (ctx.totalChars + fullLine.length > ctx.maxOutputChars) { @@ -459,7 +561,7 @@ function serializeNode( if (!ctx.interactiveOnly || interactive) { const textContent = getDirectTextContent(node); - const line = formatElement(node, attrMap, indent, textContent, interactive, customHints); + const line = formatElement(node, attrMap, indent, textContent, interactive, customHints, ctx.planningProfile, ctx.referencedIds); const lineWithNewline = line + '\n'; if (ctx.totalChars + lineWithNewline.length > ctx.maxOutputChars) { @@ -530,6 +632,13 @@ function serializeNode( for (const group of groups) { if (ctx.truncated) return; + if (ctx.planningProfile === 'stable' && group.nodes.every(isDecorativeMediaNode)) { + // A purely decorative media run contributes no planning signal. Skip it + // as a group instead of visiting every omitted leaf and exhausting the + // serializer node budget on ad/image-heavy pages. + continue; + } + // Skip dedup for groups containing interactive elements to avoid // hiding clickable buttons/links/inputs from the LLM const groupHasInteractive = group.nodes.some(n => containsInteractive(n, childPaths.get(n) ?? path, ctx)); @@ -715,6 +824,7 @@ export async function serializeDOM( const interactiveOnly = (options?.interactiveOnly ?? false) || options?.filter === 'interactive'; const compression = options?.compression ?? 'light'; // default to 'light' const includeUserAgentShadowDOM = options?.includeUserAgentShadowDOM ?? false; + const planningProfile = options?.planningProfile ?? 'default'; // Get page stats via page.evaluate const pageStats = await withTimeout( @@ -765,6 +875,11 @@ export async function serializeDOM( { depth: documentDepth, pierce: true }, ); + const referencedIds = new Set(); + if (planningProfile === 'stable') { + collectReferencedIds(root, referencedIds); + } + const ctx: SerializeContext = { lines: [], totalChars: 0, @@ -775,6 +890,8 @@ export async function serializeDOM( interactiveOnly, compression, includeUserAgentShadowDOM, + planningProfile, + referencedIds, nodesVisited: 0, maxNodes: DEFAULT_MAX_SERIALIZER_NODES, customInteractiveHints, @@ -787,6 +904,10 @@ export async function serializeDOM( appendBoundedLine(ctx, statsLine); } + if (includePageStats && planningProfile === 'stable' && !ctx.truncated) { + appendBoundedLine(ctx, '[planning_profile] stable\n\n'); + } + // Serialize from root if (!ctx.truncated) { serializeNode(root, 0, ctx); diff --git a/src/tools/read-page.ts b/src/tools/read-page.ts index 35cd097a9..2b9dee555 100644 --- a/src/tools/read-page.ts +++ b/src/tools/read-page.ts @@ -129,6 +129,11 @@ const definition: MCPToolDefinition = { enum: ['none', 'delta'], description: 'Compression mode. "delta" returns only changes since last read.', }, + planningProfile: { + type: 'string', + enum: ['default', 'stable'], + description: 'DOM mode only: stable omits decorative/noisy serialization details without mutating the live page. Default: default.', + }, fallback: { type: 'string', enum: ['none', 'dom'], @@ -680,10 +685,12 @@ const handler: ToolHandler = async ( try { const refId = args.ref_id as string | undefined; const depth = args.depth as number | undefined; + const planningProfile = (args.planningProfile as 'default' | 'stable' | undefined) ?? 'default'; const result = await measure('domGetDocumentMs', () => serializeDOM(page, cdpClient, { maxDepth: depth ?? -1, filter: filter, interactiveOnly: filter === 'interactive', + planningProfile, })); diagnostics.formatMs = diagnostics.domGetDocumentMs; diff --git a/tests/cli/admin-keys.test.ts b/tests/cli/admin-keys.test.ts index bdfaf0e03..4a5fdd25d 100644 --- a/tests/cli/admin-keys.test.ts +++ b/tests/cli/admin-keys.test.ts @@ -151,6 +151,7 @@ describe('admin keys CLI', () => { const stdoutTokens = stdout.match(/oc_live_acme_[A-Za-z0-9]+/g) ?? []; expect(stdoutTokens).toHaveLength(1); const plaintext = stdoutTokens[0]; + expect(plaintext).toMatch(/^oc_live_acme_[A-Za-z0-9]+$/); // Warning routed to stderr. expect(stderr).toContain('SAVE THIS KEY NOW'); diff --git a/tests/dom/dom-planning-profile.test.ts b/tests/dom/dom-planning-profile.test.ts new file mode 100644 index 000000000..8cc1e55ce --- /dev/null +++ b/tests/dom/dom-planning-profile.test.ts @@ -0,0 +1,171 @@ +/// + +import { serializeDOM } from '../../src/dom/dom-serializer'; + +function page() { + return { + evaluate: jest.fn().mockResolvedValue({ + url: 'https://example.com/noisy', + title: 'Noisy fixture', + scrollX: 0, + scrollY: 0, + scrollWidth: 1200, + scrollHeight: 1600, + viewportWidth: 1200, + viewportHeight: 800, + }), + }; +} + +function cdp(root: Record) { + return { + send: jest.fn().mockResolvedValue({ root }), + }; +} + +function el(nodeId: number, tag: string, attrs: string[] = [], children: unknown[] = []) { + return { nodeId, backendNodeId: nodeId + 100, nodeType: 1, nodeName: tag.toUpperCase(), localName: tag, attributes: attrs, children }; +} + +function txt(nodeId: number, value: string) { + return { nodeId, backendNodeId: nodeId + 100, nodeType: 3, nodeName: '#text', localName: '', nodeValue: value }; +} + +const noisyDoc = { + nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '', + children: [el(2, 'html', [], [el(3, 'body', [], [ + ...Array.from({ length: 20 }, (_, i) => el(10 + i, 'img', ['src', `/ad-${i}.png`, 'class', 'decorative-ad-slot'])), + el(40, 'a', ['href', '/promo'], [el(41, 'img', ['alt', 'Promo image', 'src', '/promo.png'])]), + el(50, 'input', ['type', 'email', 'placeholder', 'Email', 'id', 'email-field']), + el(51, 'button', ['id', 'save'], [txt(52, 'Save')]), + el(60, 'iframe', ['src', '/frame.html', 'title', 'Frame'], []), + ])])], +}; + +describe('DOM serializer planningProfile=stable', () => { + test('omits decorative media while preserving actionable elements', async () => { + const defaultResult = await serializeDOM(page() as never, cdp(noisyDoc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'default', + }); + const stableResult = await serializeDOM(page() as never, cdp(noisyDoc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'stable', + }); + + expect(defaultResult.content).toContain(' { + const doc = { + nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '', + children: [el(2, 'html', [], [el(3, 'body', [], [ + el(10, 'a', ['href', '/hero'], [ + el(11, 'picture', [], [ + el(12, 'source', ['src', '/hero.webp']), + el(13, 'img', ['src', '/hero.png', 'alt', 'Hero product']), + ]), + ]), + el(20, 'video', ['src', '/demo.mp4', 'controls', ''], []), + ])])], + }; + + const result = await serializeDOM(page() as never, cdp(doc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'stable', + }); + + expect(result.content).toContain(' { + const doc = { + nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '', + children: [el(2, 'html', [], [el(3, 'body', [], [ + el(10, 'label', ['for', 'field-123456789abc'], [txt(11, 'Email')]), + el(12, 'input', ['id', 'field-123456789abc', 'type', 'email']), + el(13, 'input', ['id', 'generated-abcdef1234567890', 'type', 'text']), + ])])], + }; + + const result = await serializeDOM(page() as never, cdp(doc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'stable', + }); + + expect(result.content).toContain('for="field-123456789abc"'); + expect(result.content).toContain('id="field-123456789abc"'); + expect(result.content).not.toContain('id="generated-abcdef1234567890"'); + }); + + test('suppresses decorative media group summaries in stable output', async () => { + const doc = { + nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '', + children: [el(2, 'html', [], [el(3, 'body', [], [ + ...Array.from({ length: 5 }, (_, i) => el(10 + i, 'img', ['src', `/noise-${i}.png`])), + ])])], + }; + + const result = await serializeDOM(page() as never, cdp(doc) as never, { + includePageStats: false, + compression: 'light', + planningProfile: 'stable', + }); + + expect(result.content).not.toContain('img ×5'); + expect(result.content).not.toContain(' { + const result = await serializeDOM(page() as never, cdp(noisyDoc) as never, { + planningProfile: 'stable', + }); + + expect(result.content).toContain('[planning_profile] stable'); + }); + + test('preserves volatile IDs referenced by ARIA IDREF attrs in stable output', async () => { + const doc = { + nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '', + children: [el(2, 'html', [], [el(3, 'body', [], [ + // combobox with aria-activedescendant pointing to a react-aria volatile id + el(10, 'div', ['role', 'combobox', 'aria-activedescendant', 'react-aria-abc123def456'], []), + el(11, 'div', ['id', 'react-aria-abc123def456', 'role', 'option'], [txt(12, 'Option A')]), + // unreferenced generated id should be stripped + el(13, 'div', ['id', 'react-aria-xyz999888777'], [txt(14, 'Noise')]), + ])])], + }; + + const result = await serializeDOM(page() as never, cdp(doc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'stable', + }); + + // referenced id must survive volatile-id pruning + expect(result.content).toContain('id="react-aria-abc123def456"'); + // unreferenced generated id must be stripped + expect(result.content).not.toContain('id="react-aria-xyz999888777"'); + // default mode must strip nothing + const defaultResult = await serializeDOM(page() as never, cdp(doc) as never, { + includePageStats: false, + compression: 'none', + planningProfile: 'default', + }); + expect(defaultResult.content).toContain('id="react-aria-abc123def456"'); + expect(defaultResult.content).toContain('id="react-aria-xyz999888777"'); + }); +}); diff --git a/tests/integration/health-endpoint-gating.test.ts b/tests/integration/health-endpoint-gating.test.ts index db8a7731a..8c3cc8d42 100644 --- a/tests/integration/health-endpoint-gating.test.ts +++ b/tests/integration/health-endpoint-gating.test.ts @@ -269,12 +269,9 @@ describeFn('health endpoint gating (issue #648)', () => { const shutdownTimeoutMs = 30_000; const exit = await waitForExit(child, shutdownTimeoutMs); expect(exit.timedOut).toBe(false); - if (process.platform === 'win32') { - // Windows may report a clean SIGTERM as code=null/signal=SIGTERM. - expect(exit.code === 0 || exit.signal === 'SIGTERM').toBe(true); - } else { - expect(exit.code).toBe(0); - } + // Node may report a clean SIGTERM shutdown as either code=0 or + // code=null/signal=SIGTERM depending on platform and timing. + expect(exit.code === 0 || exit.signal === 'SIGTERM').toBe(true); expect(stderr).not.toMatch(/TypeError/); expect(stderr).not.toMatch(/Cannot read properties of null/); expect(stderr).not.toMatch(/UnhandledPromiseRejection/);