-
Notifications
You must be signed in to change notification settings - Fork 36
perf(dom): add snapshot-only stable planning profile #1066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7b6289c
f27b038
784fb68
c39a020
d52896b
2fe993f
d8b9f7a
cab5476
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ export interface DOMSerializerOptions { | |
| // light (default): sibling dedup threshold=4, container collapse enabled | ||
| // aggressive: sibling dedup threshold=3 | ||
| includeUserAgentShadowDOM?: boolean; // default: false | ||
| planningProfile?: 'default' | 'stable'; | ||
| } | ||
|
|
||
| export interface PageStats { | ||
|
|
@@ -137,6 +138,33 @@ function escapeAttributeValue(value: string): string { | |
| }); | ||
| } | ||
|
|
||
| const ID_REFERENCE_ATTRS = new Set([ | ||
| 'for', | ||
| 'aria-labelledby', | ||
| 'aria-describedby', | ||
| 'aria-activedescendant', | ||
| 'aria-controls', | ||
| 'aria-owns', | ||
| 'aria-flowto', | ||
| 'aria-details', | ||
| ]); | ||
|
|
||
| function collectReferencedIds(node: DOMNode, referencedIds: Set<string>): void { | ||
| if (node.nodeType === NODE_TYPE_ELEMENT) { | ||
| const attrMap = parseAttributes(node.attributes); | ||
| for (const attr of ID_REFERENCE_ATTRS) { | ||
| const value = attrMap.get(attr); | ||
| if (!value) continue; | ||
| for (const id of value.split(/\s+/).filter(Boolean)) { | ||
| referencedIds.add(id); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| for (const child of node.children || []) collectReferencedIds(child, referencedIds); | ||
| if (node.contentDocument) collectReferencedIds(node.contentDocument, referencedIds); | ||
| for (const shadowRoot of node.shadowRoots || []) collectReferencedIds(shadowRoot, referencedIds); | ||
| } | ||
| /** | ||
| * Check if a node is interactive | ||
| */ | ||
|
|
@@ -188,20 +216,75 @@ function getDirectTextContent(node: DOMNode): string { | |
| /** | ||
| * Format a single element node as a line | ||
| */ | ||
| function isVolatileStableAttr(name: string, value: string): boolean { | ||
| if (name === 'id') { | ||
| const hasRandomKeyword = /(?:^|[-_])(uuid|random|nonce|session|generated|ember|react-aria)[-_]?[a-z0-9]*$/i.test(value); | ||
| const hasUuidShape = /[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/i.test(value); | ||
| const longHex = value.match(/[0-9a-f]{16,}/i)?.[0]; | ||
| const hasMixedLongHex = !!longHex && /[a-f]/i.test(longHex) && /\d/.test(longHex); | ||
| return hasRandomKeyword || hasUuidShape || hasMixedLongHex; | ||
| } | ||
| if (name === 'class') { | ||
| return false; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| function hasMeaningfulStableDescendant(node: DOMNode): boolean { | ||
| for (const child of node.children || []) { | ||
| if (child.nodeType !== NODE_TYPE_ELEMENT) continue; | ||
| const childTag = child.localName || child.nodeName.toLowerCase(); | ||
| const childAttrs = parseAttributes(child.attributes); | ||
| if (!isDecorativeMedia(childTag, childAttrs, isInteractive(childTag, childAttrs))) return true; | ||
| if (hasMeaningfulStableDescendant(child)) return true; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| function isDecorativeMedia(tagName: string, attrMap: Map<string, string>, interactive: boolean): boolean { | ||
| if (interactive) return false; | ||
| if (!['img', 'picture', 'source', 'video', 'canvas'].includes(tagName)) return false; | ||
| if ( | ||
| attrMap.has('alt') || | ||
| attrMap.has('title') || | ||
| attrMap.has('aria-label') || | ||
| attrMap.has('role') || | ||
| attrMap.has('data-testid') || | ||
| attrMap.has('controls') || | ||
| attrMap.has('tabindex') | ||
|
Comment on lines
+250
to
+254
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new stable-profile decorative check only preserves media when Useful? React with 👍 / 👎. |
||
| ) return false; | ||
|
Comment on lines
+248
to
+255
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In Useful? React with 👍 / 👎. |
||
| return true; | ||
| } | ||
|
|
||
| function isDecorativeMediaNode(node: DOMNode): boolean { | ||
| if (node.nodeType !== NODE_TYPE_ELEMENT) return false; | ||
| const tagName = node.localName || node.nodeName.toLowerCase(); | ||
| const attrMap = parseAttributes(node.attributes); | ||
| return isDecorativeMedia(tagName, attrMap, isInteractive(tagName, attrMap)) | ||
| && !hasMeaningfulStableDescendant(node); | ||
| } | ||
|
|
||
| function formatElement( | ||
| node: DOMNode, | ||
| attrMap: Map<string, string>, | ||
| indent: string, | ||
| textContent: string, | ||
| interactive: boolean, | ||
| hints?: string, | ||
| planningProfile: 'default' | 'stable' = 'default', | ||
| referencedIds: Set<string> = new Set(), | ||
| ): string { | ||
| const tagName = node.localName || node.nodeName.toLowerCase(); | ||
|
|
||
| // Build attribute string with only kept attrs | ||
| const attrParts: string[] = []; | ||
| for (const [k, v] of attrMap) { | ||
| if (KEEP_ATTRS.has(k)) { | ||
| if (KEEP_ATTRS.has(k) || (planningProfile === 'stable' && k === 'controls')) { | ||
| if ( | ||
| planningProfile === 'stable' | ||
| && isVolatileStableAttr(k, v) | ||
| && !(k === 'id' && referencedIds.has(v)) | ||
| ) continue; | ||
| attrParts.push(`${k}="${escapeAttributeValue(v)}"`); | ||
| } | ||
| } | ||
|
|
@@ -323,6 +406,8 @@ interface SerializeContext { | |
| interactiveOnly: boolean; | ||
| compression: 'none' | 'light' | 'aggressive'; | ||
| includeUserAgentShadowDOM: boolean; | ||
| planningProfile: 'default' | 'stable'; | ||
| referencedIds: Set<string>; | ||
| nodesVisited: number; | ||
| maxNodes: number; | ||
| customInteractiveHints: Map<string, string>; | ||
|
|
@@ -411,6 +496,23 @@ function serializeNode( | |
| const customHints = ctx.customInteractiveHints.get(path); | ||
| const interactive = isInteractive(tagName, attrMap, customHints); | ||
|
|
||
| if (ctx.planningProfile === 'stable' && isDecorativeMedia(tagName, attrMap, interactive)) { | ||
| const fallbackText = getDirectTextContent(node); | ||
| const indent = ' '.repeat(depth); | ||
| if (fallbackText) { | ||
| const line = formatElement(node, attrMap, indent, fallbackText, interactive, customHints, ctx.planningProfile, ctx.referencedIds); | ||
| if (!appendBoundedLine(ctx, line + '\n')) return; | ||
| ctx.emittedBackendNodeIds.add(node.backendNodeId); | ||
|
Comment on lines
+502
to
+505
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In Useful? React with 👍 / 👎. |
||
| } | ||
| // Omit decorative media wrappers without fallback text, but still inspect | ||
| // descendants so meaningful labels inside <picture> survive. | ||
| for (const child of node.children || []) { | ||
| serializeNode(child, depth + 1, ctx); | ||
|
Comment on lines
+499
to
+510
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Useful? React with 👍 / 👎. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Pass the child path into this recursive call; otherwise it falls back to the default Useful? React with 👍 / 👎. |
||
| if (ctx.truncated) return; | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| const indent = ' '.repeat(depth); | ||
|
|
||
| // Container chain collapse (only in non-'none' compression mode, non-interactive containers) | ||
|
|
@@ -429,7 +531,7 @@ function serializeNode( | |
| const leafHints = ctx.customInteractiveHints.get(leafPath); | ||
| const leafInteractive = isInteractive(leafTag, leafAttrMap, leafHints); | ||
| const leafText = getDirectTextContent(leaf); | ||
| const leafLine = formatElement(leaf, leafAttrMap, '', leafText, leafInteractive, leafHints); | ||
| const leafLine = formatElement(leaf, leafAttrMap, '', leafText, leafInteractive, leafHints, ctx.planningProfile, ctx.referencedIds); | ||
| const fullLine = `${indent}${chainPrefix}${leafLine}\n`; | ||
|
|
||
| if (ctx.totalChars + fullLine.length > ctx.maxOutputChars) { | ||
|
|
@@ -459,7 +561,7 @@ function serializeNode( | |
|
|
||
| if (!ctx.interactiveOnly || interactive) { | ||
| const textContent = getDirectTextContent(node); | ||
| const line = formatElement(node, attrMap, indent, textContent, interactive, customHints); | ||
| const line = formatElement(node, attrMap, indent, textContent, interactive, customHints, ctx.planningProfile, ctx.referencedIds); | ||
| const lineWithNewline = line + '\n'; | ||
|
|
||
| if (ctx.totalChars + lineWithNewline.length > ctx.maxOutputChars) { | ||
|
|
@@ -530,6 +632,13 @@ function serializeNode( | |
| for (const group of groups) { | ||
| if (ctx.truncated) return; | ||
|
|
||
| if (ctx.planningProfile === 'stable' && group.nodes.every(isDecorativeMediaNode)) { | ||
| // A purely decorative media run contributes no planning signal. Skip it | ||
| // as a group instead of visiting every omitted leaf and exhausting the | ||
| // serializer node budget on ad/image-heavy pages. | ||
| continue; | ||
|
Comment on lines
+635
to
+639
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The stable-mode sibling fast-path skips an entire group when Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| // Skip dedup for groups containing interactive elements to avoid | ||
| // hiding clickable buttons/links/inputs from the LLM | ||
| const groupHasInteractive = group.nodes.some(n => containsInteractive(n, childPaths.get(n) ?? path, ctx)); | ||
|
|
@@ -715,6 +824,7 @@ export async function serializeDOM( | |
| const interactiveOnly = (options?.interactiveOnly ?? false) || options?.filter === 'interactive'; | ||
| const compression = options?.compression ?? 'light'; // default to 'light' | ||
| const includeUserAgentShadowDOM = options?.includeUserAgentShadowDOM ?? false; | ||
| const planningProfile = options?.planningProfile ?? 'default'; | ||
|
|
||
| // Get page stats via page.evaluate | ||
| const pageStats = await withTimeout( | ||
|
|
@@ -765,6 +875,11 @@ export async function serializeDOM( | |
| { depth: documentDepth, pierce: true }, | ||
| ); | ||
|
|
||
| const referencedIds = new Set<string>(); | ||
| if (planningProfile === 'stable') { | ||
| collectReferencedIds(root, referencedIds); | ||
|
Comment on lines
+879
to
+880
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| const ctx: SerializeContext = { | ||
| lines: [], | ||
| totalChars: 0, | ||
|
|
@@ -775,6 +890,8 @@ export async function serializeDOM( | |
| interactiveOnly, | ||
| compression, | ||
| includeUserAgentShadowDOM, | ||
| planningProfile, | ||
| referencedIds, | ||
| nodesVisited: 0, | ||
| maxNodes: DEFAULT_MAX_SERIALIZER_NODES, | ||
| customInteractiveHints, | ||
|
|
@@ -787,6 +904,10 @@ export async function serializeDOM( | |
| appendBoundedLine(ctx, statsLine); | ||
| } | ||
|
|
||
| if (includePageStats && planningProfile === 'stable' && !ctx.truncated) { | ||
| appendBoundedLine(ctx, '[planning_profile] stable\n\n'); | ||
| } | ||
|
|
||
| // Serialize from root | ||
| if (!ctx.truncated) { | ||
| serializeNode(root, 0, ctx); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -129,6 +129,11 @@ const definition: MCPToolDefinition = { | |
| enum: ['none', 'delta'], | ||
| description: 'Compression mode. "delta" returns only changes since last read.', | ||
| }, | ||
| planningProfile: { | ||
| type: 'string', | ||
| enum: ['default', 'stable'], | ||
| description: 'DOM mode only: stable omits decorative/noisy serialization details without mutating the live page. Default: default.', | ||
| }, | ||
| fallback: { | ||
| type: 'string', | ||
| enum: ['none', 'dom'], | ||
|
|
@@ -680,10 +685,12 @@ const handler: ToolHandler = async ( | |
| try { | ||
| const refId = args.ref_id as string | undefined; | ||
| const depth = args.depth as number | undefined; | ||
| const planningProfile = (args.planningProfile as 'default' | 'stable' | undefined) ?? 'default'; | ||
| const result = await measure('domGetDocumentMs', () => serializeDOM(page, cdpClient, { | ||
| maxDepth: depth ?? -1, | ||
| filter: filter, | ||
| interactiveOnly: filter === 'interactive', | ||
| planningProfile, | ||
|
Comment on lines
+688
to
+693
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This introduces Useful? React with 👍 / 👎. |
||
| })); | ||
| diagnostics.formatMs = diagnostics.domGetDocumentMs; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The stable profile now drops IDs that look generated unless they appear in
ID_REFERENCE_ATTRS, but this allowlist omits ARIA IDREF attributes likearia-activedescendant(and similar relationships). In widgets such as comboboxes/listboxes, the active option ID is often generated (react-aria-*), so the serializer can emit the relationship attribute while stripping the referenced element'sid, breaking the reference the model needs to reason about focus/selection state.Useful? React with 👍 / 👎.