shaun0927 · shaun0927 · May 12, 2026 · May 12, 2026 · May 12, 2026 · chatgpt-codex-connector
diff --git a/README.md b/README.md
@@ -401,13 +401,13 @@ read_page tabId="tab1" mode="dom"
 
 [page_stats] url: https://example.com | title: Example | scroll: 0,0 | viewport: 1920x1080
 
-[142]<input type="search" placeholder="Search..." aria-label="Search"/> ★
-[156]<button type="submit"/>Search ★
-[289]<a href="/home"/>Home ★
+# [142]<input type="search" placeholder="Search..." aria-label="Search"/> ★
+$ [156]<button type="submit"/>Search ★
+@ [289]<a href="/home"/>Home ★
 [352]<h1/>Welcome to Example
 ```
 
-DOM mode outputs `[backendNodeId]` as stable identifiers — they persist for the lifetime of the DOM node, unlike `ref_N` IDs which are cleared on each AX-mode `read_page` call.
+DOM mode outputs `[backendNodeId]` as stable identifiers — they persist for the lifetime of the DOM node, unlike `ref_N` IDs which are cleared on each AX-mode `read_page` call. A compact marker before an identifier describes the action affordance: `#` text input, `@` link, `$` button/control, `%` visual target. The marker is display metadata only; pass the identifier itself (`142`, `node_142`, or `ref_N`) to action tools.
 
 ### JavaScript and Shadow DOM
 

diff --git a/src/dom/dom-serializer.ts b/src/dom/dom-serializer.ts
@@ -5,6 +5,7 @@
 import type { Page } from 'puppeteer-core';
 import { MAX_OUTPUT_CHARS, DEFAULT_MAX_SERIALIZER_NODES } from '../config/defaults';
 import { withTimeout } from '../utils/with-timeout';
+import { formatAffordancePrefix } from '../utils/element-affordance';
 
 export interface DOMSerializerOptions {
   maxDepth?: number;                    // default: -1 (unlimited)
@@ -169,7 +170,14 @@ function formatElement(
   const attrStr = attrParts.length > 0 ? ' ' + attrParts.join(' ') : '';
 
   const interactiveMarker = interactive ? ' ★' : '';
-  const line = `${indent}[${node.backendNodeId}]<${tagName}${attrStr}/>${textContent}${interactiveMarker}`;
+  const affordancePrefix = formatAffordancePrefix({
+    tagName,
+    role: attrMap.get('role'),
+    type: attrMap.get('type'),
+    href: attrMap.get('href'),
+    contentEditable: attrMap.get('contenteditable'),
+  });
+  const line = `${indent}${affordancePrefix}[${node.backendNodeId}]<${tagName}${attrStr}/>${textContent}${interactiveMarker}`;
   return line;
 }
 

diff --git a/src/tools/find.ts b/src/tools/find.ts
@@ -14,6 +14,7 @@ import { getCircuitBreaker } from '../utils/ralph/circuit-breaker';
 import { analyzeScreenshot, formatElementMapAsText } from '../vision/screenshot-analyzer';
 import { getVisionMode, trackVisionUsage } from '../vision/config';
 import { detectVisionHints, formatVisionHints } from '../vision/auto-detect';
+import { formatAffordancePrefix } from '../utils/element-affordance';
 
 const definition: MCPToolDefinition = {
   name: 'find',
@@ -114,7 +115,7 @@ const handler: ToolHandler = async (
           );
           const scoreLabel = el.matchLevel === 1 ? '\u2605\u2605\u2605' : el.matchLevel === 2 ? '\u2605\u2605' : '\u2605';
           axOutput.push(
-            `[${refId}] ${el.role}: "${el.name}" at (${Math.round(el.rect.x)}, ${Math.round(el.rect.y)}) ${scoreLabel} [AX]`
+            `${formatAffordancePrefix({ role: el.role })}[${refId}] ${el.role}: "${el.name}" at (${Math.round(el.rect.x)}, ${Math.round(el.rect.y)}) ${scoreLabel} [AX]`
           );
         }
 
@@ -187,7 +188,7 @@ const handler: ToolHandler = async (
         // Include score in output for transparency
         const scoreLabel = el.score >= 100 ? '★★★' : el.score >= 50 ? '★★' : el.score >= 20 ? '★' : '';
         output.push(
-          `[${refId}] ${el.role}: "${el.name}" at (${Math.round(el.rect.x)}, ${Math.round(el.rect.y)}) ${scoreLabel}`.trim()
+          `${formatAffordancePrefix({ role: el.role, tagName: el.tagName, type: el.type })}[${refId}] ${el.role}: "${el.name}" at (${Math.round(el.rect.x)}, ${Math.round(el.rect.y)}) ${scoreLabel}`.trim()
         );
       }
     }

diff --git a/src/utils/element-affordance.ts b/src/utils/element-affordance.ts
@@ -0,0 +1,126 @@
+/**
+ * Compact action-affordance classification for perception output.
+ *
+ * The returned marker is display metadata only. It must be rendered outside
+ * canonical refs/backendNodeIds so existing ref parsers keep working.
+ */
+export type ElementAffordance =
+  | 'text-input'
+  | 'link'
+  | 'control'
+  | 'visual'
+  | 'text';
+
+export type AffordanceMarker = '#' | '@' | '$' | '%' | '';
+
+export interface ElementAffordanceInput {
+  tagName?: string | null;
+  role?: string | null;
+  type?: string | null;
+  href?: string | null;
+  contentEditable?: boolean | string | null;
+}
+
+const TEXT_INPUT_TYPES = new Set([
+  'text',
+  'password',
+  'email',
+  'search',
+  'url',
+  'tel',
+  'number',
+]);
+
+const TEXT_INPUT_ROLES = new Set([
+  'textbox',
+  'searchbox',
+]);
+
+const LINK_ROLES = new Set([
+  'link',
+]);
+
+const CONTROL_ROLES = new Set([
+  'button',
+  'checkbox',
+  'radio',
+  'combobox',
+  'listbox',
+  'menu',
+  'menuitem',
+  'menuitemcheckbox',
+  'menuitemradio',
+  'option',
+  'tab',
+  'switch',
+  'slider',
+  'spinbutton',
+  'treeitem',
+]);
+
+const VISUAL_ROLES = new Set([
+  'image',
+  'img',
+  'graphics-symbol',
+]);
+
+function normalize(value: string | null | undefined): string {
+  return (value ?? '').trim().toLowerCase();
+}
+
+function isContentEditable(value: ElementAffordanceInput['contentEditable']): boolean {
+  return value === true || normalize(String(value ?? '')) === 'true' || normalize(String(value ?? '')) === 'plaintext-only';
+}
+
+export function classifyElementAffordance(input: ElementAffordanceInput): ElementAffordance {
+  const tagName = normalize(input.tagName);
+  const role = normalize(input.role);
+  const type = normalize(input.type);
+
+  if (isContentEditable(input.contentEditable) || TEXT_INPUT_ROLES.has(role)) {
+    return 'text-input';
+  }
+
+  if (tagName === 'textarea') {
+    return 'text-input';
+  }
+
+  if (tagName === 'input') {
+    if (!type || TEXT_INPUT_TYPES.has(type)) return 'text-input';
+    if (type === 'hidden') return 'text';
+    return 'control';
+  }
+
+  if (tagName === 'a' || LINK_ROLES.has(role)) {
+    return 'link';
+  }
+
+  if (tagName === 'button' || tagName === 'select' || tagName === 'details' || CONTROL_ROLES.has(role)) {
+    return 'control';
+  }
+
+  if (tagName === 'img' || tagName === 'canvas' || tagName === 'video' || tagName === 'svg' || VISUAL_ROLES.has(role)) {
+    return 'visual';
+  }
+
+  return 'text';
+}
+
+export function affordanceMarkerFor(kind: ElementAffordance): AffordanceMarker {
+  switch (kind) {
+    case 'text-input': return '#';
+    case 'link': return '@';
+    case 'control': return '$';
+    case 'visual': return '%';
+    case 'text': return '';
+  }
+}
+
+export function getAffordanceMarker(input: ElementAffordanceInput): AffordanceMarker {
+  return affordanceMarkerFor(classifyElementAffordance(input));
+}
+
+export function formatAffordancePrefix(input: ElementAffordanceInput): string {
+  const marker = getAffordanceMarker(input);
+  return marker ? `${marker} ` : '';
+}
diff --git a/tests/dom/dom-serializer.test.ts b/tests/dom/dom-serializer.test.ts
@@ -374,6 +374,33 @@ describe('DOM Serializer', () => {
     expect(result.content).toContain('[801]');      // button
   });
 
+  test('renders affordance markers outside backendNodeId tokens', async () => {
+    const affordanceDoc = {
+      nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '',
+      children: [{
+        nodeId: 2, backendNodeId: 2, nodeType: 1, nodeName: 'BODY', localName: 'body',
+        attributes: [],
+        children: [
+          { nodeId: 3, backendNodeId: 810, nodeType: 1, nodeName: 'INPUT', localName: 'input', attributes: ['type', 'search'], children: [] },
+          { nodeId: 4, backendNodeId: 811, nodeType: 1, nodeName: 'A', localName: 'a', attributes: ['href', '/docs'], children: [] },
+          { nodeId: 5, backendNodeId: 812, nodeType: 1, nodeName: 'BUTTON', localName: 'button', attributes: [], children: [] },
+          { nodeId: 6, backendNodeId: 813, nodeType: 1, nodeName: 'IMG', localName: 'img', attributes: ['alt', 'Logo'], children: [] },
+        ],
+      }],
+    };
+
+    const page = createMockPageForDOM();
+    const cdpClient = createMockCDPClientForDOM(affordanceDoc);
+
+    const result = await serializeDOM(page as never, cdpClient as never, { includePageStats: false });
+
+    expect(result.content).toContain('# [810]<input');
+    expect(result.content).toContain('@ [811]<a');
+    expect(result.content).toContain('$ [812]<button');
+    expect(result.content).toContain('% [813]<img');
+    expect(result.content).not.toContain('[#810]');
+  });
+
   test('includes role-based interactive elements', async () => {
     const roleDoc = {
       nodeId: 1, backendNodeId: 1, nodeType: 9, nodeName: '#document', localName: '',

diff --git a/tests/dom/shadow-dom-serializer.test.ts b/tests/dom/shadow-dom-serializer.test.ts
@@ -163,7 +163,7 @@ describe('DOM Serializer - Shadow DOM', () => {
     // shadow children at depth+2 = depth 4 = 8 spaces
     const buttonLine = lines.find(l => l.includes('[2100]<button'));
     expect(buttonLine).toBeDefined();
-    expect(buttonLine!.startsWith('        [2100]<button')).toBe(true);
+    expect(buttonLine!).toMatch(/^        (?:\$ )?\[2100\]<button/);
   });
 
   // 2. Closed shadow root rendering

diff --git a/tests/tools/find.test.ts b/tests/tools/find.test.ts
@@ -27,6 +27,7 @@ describe('FindTool', () => {
   let mockRefIdManager: ReturnType<typeof createMockRefIdManager>;
   let testSessionId: string;
   let testTargetId: string;
+  let mockAXMatches: unknown[];
 
   const getFindHandler = async () => {
     jest.resetModules();
@@ -36,6 +37,11 @@ describe('FindTool', () => {
     jest.doMock('../../src/utils/ref-id-manager', () => ({
       getRefIdManager: () => mockRefIdManager,
     }));
+    jest.doMock('../../src/utils/ax-element-resolver', () => ({
+      resolveElementsByAXTree: jest.fn().mockResolvedValue(mockAXMatches),
+      invalidateAXCache: jest.fn(),
+      clearAXCache: jest.fn(),
+    }));
 
     const { registerFindTool } = await import('../../src/tools/find');
 
@@ -56,6 +62,8 @@ describe('FindTool', () => {
     (getSessionManager as jest.Mock).mockReturnValue(mockSessionManager);
     (getRefIdManager as jest.Mock).mockReturnValue(mockRefIdManager);
 
+    mockAXMatches = [];
+
     testSessionId = 'test-session-123';
     const { targetId, page } = await mockSessionManager.createTarget(testSessionId, 'about:blank');
     testTargetId = targetId;
@@ -98,6 +106,7 @@ describe('FindTool', () => {
 
       expect(page.evaluate).toHaveBeenCalled();
       expect(result.content[0].text).toContain('Found');
+      expect(result.content[0].text).toContain('$ [ref_');
     });
 
     test('finds link by keyword', async () => {
@@ -127,6 +136,7 @@ describe('FindTool', () => {
       }) as { content: Array<{ type: string; text: string }> };
 
       expect(result.content[0].text).toContain('link');
+      expect(result.content[0].text).toContain('@ [ref_');
     });
 
     test('finds input by keyword', async () => {
@@ -157,6 +167,7 @@ describe('FindTool', () => {
       }) as { content: Array<{ type: string; text: string }> };
 
       expect(result.content[0].text).toContain('textbox');
+      expect(result.content[0].text).toContain('# [ref_');
     });
 
     test('finds checkbox by keyword', async () => {
@@ -187,6 +198,7 @@ describe('FindTool', () => {
       }) as { content: Array<{ type: string; text: string }> };
 
       expect(result.content[0].text).toContain('checkbox');
+      expect(result.content[0].text).toContain('$ [ref_');
     });
 
     test('finds element by text content', async () => {
@@ -239,6 +251,31 @@ describe('FindTool', () => {
     });
   });
 
+
+  describe('AX affordance markers', () => {
+    test('places marker outside canonical ref for AX-first results', async () => {
+      mockAXMatches = [{
+        backendDOMNodeId: 88001,
+        role: 'link',
+        name: 'Docs',
+        matchLevel: 1,
+        rect: { x: 12, y: 34, width: 100, height: 20 },
+        properties: {},
+        source: 'ax',
+      }];
+
+      const handler = await getFindHandler();
+      const result = await handler(testSessionId, {
+        tabId: testTargetId,
+        query: 'Docs link',
+      }) as { content: Array<{ type: string; text: string }> };
+
+      expect(result.content[0].text).toContain('[via AX tree]');
+      expect(result.content[0].text).toContain('@ [ref_');
+      expect(result.content[0].text).not.toContain('[@ref_');
+    });
+  });
+
   describe('Result Limiting', () => {
     test('returns max 20 elements', async () => {
       const handler = await getFindHandler();

diff --git a/tests/utils/element-affordance.test.ts b/tests/utils/element-affordance.test.ts
@@ -0,0 +1,32 @@
+/// <reference types="jest" />
+
+import { classifyElementAffordance, formatAffordancePrefix, getAffordanceMarker } from '../../src/utils/element-affordance';
+
+describe('element affordance classifier', () => {
+  test.each([
+    [{ tagName: 'input', type: 'text' }, 'text-input', '# '],
+    [{ tagName: 'input', type: 'search' }, 'text-input', '# '],
+    [{ tagName: 'textarea' }, 'text-input', '# '],
+    [{ role: 'textbox' }, 'text-input', '# '],
+    [{ tagName: 'div', contentEditable: 'true' }, 'text-input', '# '],
+    [{ tagName: 'a', href: '/home' }, 'link', '@ '],
+    [{ role: 'link' }, 'link', '@ '],
+    [{ tagName: 'button' }, 'control', '$ '],
+    [{ tagName: 'input', type: 'checkbox' }, 'control', '$ '],
+    [{ role: 'combobox' }, 'control', '$ '],
+    [{ tagName: 'img' }, 'visual', '% '],
+    [{ role: 'image' }, 'visual', '% '],
+    [{ tagName: 'p' }, 'text', ''],
+  ])('classifies %o as %s', (input, expectedKind, expectedPrefix) => {
+    expect(classifyElementAffordance(input)).toBe(expectedKind);
+    expect(formatAffordancePrefix(input)).toBe(expectedPrefix);
+  });
+
+  test('does not mark hidden inputs as actionable', () => {
+    expect(getAffordanceMarker({ tagName: 'input', type: 'hidden' })).toBe('');
+  });
+
+  test('treats password fields as text-insertable markers without exposing values', () => {
+    expect(getAffordanceMarker({ tagName: 'input', type: 'password' })).toBe('#');
+  });
+});