Skip to content

Commit 89358fa

Browse files
cameroncookecodex
andcommitted
fix(ui-automation): Tighten runtime snapshot review issues
Keep next-step guidance available for unchanged runtime snapshots, cap compact runtime snapshot rows, and simplify runtime snapshot rendering branches called out by review. Add direct semantic tap helper coverage for selector choice, duplicate selector fallback, switch touch batching, and recoverable AXe fallback. Co-Authored-By: Codex <noreply@openai.com>
1 parent 16c363a commit 89358fa

6 files changed

Lines changed: 279 additions & 49 deletions

File tree

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import { describe, expect, it } from 'vitest';
2+
import { mockProcess } from '../../../../test-utils/mock-executors.ts';
3+
import type { CommandExecutor } from '../../../../utils/execution/index.ts';
4+
import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts';
5+
import {
6+
createSemanticTapBatchSteps,
7+
createSemanticTapCommand,
8+
executeSemanticTapWithAmbiguityFallback,
9+
isRecoverableAxeSelectorError,
10+
} from '../shared/semantic-tap.ts';
11+
import {
12+
createMockAxeHelpers,
13+
createNode,
14+
createSequencedExecutor,
15+
simulatorId,
16+
} from './ui-action-test-helpers.ts';
17+
18+
function createElements(nodes = [createNode()]) {
19+
return createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: 1_000 }).elements;
20+
}
21+
22+
describe('semantic tap helpers', () => {
23+
it('recognizes recoverable AXe selector failures', () => {
24+
expect(
25+
isRecoverableAxeSelectorError(
26+
new Error('Multiple (2) accessibility elements matched selector'),
27+
),
28+
).toBe(true);
29+
expect(
30+
isRecoverableAxeSelectorError({
31+
axeOutput: 'No accessibility element matched --label Continue',
32+
}),
33+
).toBe(true);
34+
expect(isRecoverableAxeSelectorError(new Error('Simulator is not booted'))).toBe(false);
35+
});
36+
37+
it('uses a unique semantic selector before coordinates', () => {
38+
const [element] = createElements([
39+
createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }),
40+
]);
41+
42+
const command = createSemanticTapCommand(element!, 'e1', ['--duration', '0.1'], [element!]);
43+
44+
expect(command.selectorArgs).toEqual([
45+
'tap',
46+
'--id',
47+
'continue.button',
48+
'--element-type',
49+
'Button',
50+
'--duration',
51+
'0.1',
52+
]);
53+
expect(command.primaryArgs).toBe(command.selectorArgs);
54+
expect(command.usedSelector).toBe(true);
55+
});
56+
57+
it('falls back to coordinates when semantic selectors are duplicated', () => {
58+
const elements = createElements([
59+
createNode({ AXUniqueId: 'duplicate.button', AXLabel: 'Duplicate' }),
60+
createNode({
61+
AXUniqueId: 'duplicate.button',
62+
AXLabel: 'Duplicate',
63+
frame: { x: 20, y: 80, width: 100, height: 40 },
64+
}),
65+
]);
66+
67+
const command = createSemanticTapCommand(elements[0]!, 'e1', [], elements);
68+
69+
expect(command.selectorArgs).toBeNull();
70+
expect(command.primaryArgs).toEqual(['tap', '-x', '60', '-y', '40']);
71+
expect(command.usedSelector).toBe(false);
72+
});
73+
74+
it('represents switch taps as down/up touch batch steps', () => {
75+
const [element] = createElements([
76+
createNode({
77+
type: 'Switch',
78+
role: 'AXSwitch',
79+
AXLabel: 'Alerts',
80+
frame: { x: 10, y: 20, width: 200, height: 40 },
81+
}),
82+
]);
83+
84+
const command = createSemanticTapCommand(element!, 'e1');
85+
86+
expect(command.selectorArgs).toBeNull();
87+
expect(command.coordinateArgs).toEqual(['touch', '-x', '158', '-y', '40', '--down', '--up']);
88+
expect(createSemanticTapBatchSteps(command)).toEqual([
89+
'touch -x 158 -y 40 --down',
90+
'touch -x 158 -y 40 --up',
91+
]);
92+
});
93+
94+
it('retries recoverable selector failures with coordinates', async () => {
95+
const [element] = createElements([
96+
createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }),
97+
]);
98+
const command = createSemanticTapCommand(element!, 'e1', [], [element!]);
99+
const { calls, executor } = createSequencedExecutor([
100+
{ success: false, error: 'Multiple (2) accessibility elements matched selector' },
101+
{ success: true, output: 'ok' },
102+
]);
103+
104+
await executeSemanticTapWithAmbiguityFallback({
105+
command,
106+
simulatorId,
107+
executor,
108+
axeHelpers: createMockAxeHelpers(),
109+
});
110+
111+
expect(calls.map((call) => call.command.slice(1, -2))).toEqual([
112+
['tap', '--id', 'continue.button', '--element-type', 'Button'],
113+
['tap', '-x', '60', '-y', '40'],
114+
]);
115+
});
116+
117+
it('does not retry unrecoverable selector failures', async () => {
118+
const [element] = createElements([
119+
createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }),
120+
]);
121+
const command = createSemanticTapCommand(element!, 'e1', [], [element!]);
122+
const calls: string[][] = [];
123+
const executor: CommandExecutor = async (commandArgs) => {
124+
calls.push(commandArgs);
125+
return { success: false, output: '', error: 'Simulator is not booted', process: mockProcess };
126+
};
127+
128+
await expect(
129+
executeSemanticTapWithAmbiguityFallback({
130+
command,
131+
simulatorId,
132+
executor,
133+
axeHelpers: createMockAxeHelpers(),
134+
}),
135+
).rejects.toThrow("axe command 'tap' failed.");
136+
expect(calls).toHaveLength(1);
137+
});
138+
});

src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,29 @@ describe('Snapshot UI Plugin', () => {
239239
seq: 2,
240240
});
241241
expect(getRuntimeSnapshot('12345678-1234-4234-8234-123456789012')?.seq).toBe(2);
242-
expect(second.ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined();
242+
expect(second.ctx.nextSteps).toEqual([
243+
{
244+
label: 'Refresh after layout changes',
245+
tool: 'snapshot_ui',
246+
params: { simulatorId: '12345678-1234-4234-8234-123456789012' },
247+
},
248+
{
249+
label: 'Wait for UI to settle',
250+
tool: 'wait_for_ui',
251+
params: {
252+
simulatorId: '12345678-1234-4234-8234-123456789012',
253+
predicate: 'settled',
254+
},
255+
},
256+
{
257+
label: 'Tap an elementRef',
258+
tool: 'tap',
259+
params: {
260+
simulatorId: '12345678-1234-4234-8234-123456789012',
261+
elementRef: 'e1',
262+
},
263+
},
264+
]);
243265
});
244266

245267
it('should return full runtime snapshot when sinceScreenHash differs from the current screen hash', async () => {

src/mcp/tools/ui-automation/snapshot_ui.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ import {
1111
getHandlerContext,
1212
toInternalSchema,
1313
} from '../../../utils/typed-tool-factory.ts';
14-
import { recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts';
14+
import { getRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts';
1515
import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts';
1616
import type { AxeHelpers } from './shared/axe-command.ts';
1717
import type { CaptureResultDomainResult } from '../../../types/domain-results.ts';
1818
import type { NonStreamingExecutor } from '../../../types/tool-execution.ts';
19+
import type { RuntimeSnapshotV1 } from '../../../types/ui-snapshot.ts';
1920
import { createRuntimeSnapshotNextSteps } from './shared/runtime-next-steps.ts';
2021
import {
2122
createCaptureFailureResult,
@@ -137,17 +138,21 @@ export async function snapshot_uiLogic(
137138

138139
setCaptureStructuredOutput(ctx, result);
139140

140-
if (
141-
!result.didError &&
142-
result.capture &&
143-
'type' in result.capture &&
144-
result.capture.type === 'runtime-snapshot'
145-
) {
146-
ctx.nextSteps = createRuntimeSnapshotNextSteps({
147-
simulatorId: params.simulatorId,
148-
runtimeSnapshot: result.capture,
149-
includeRefreshAndWait: true,
150-
});
141+
if (!result.didError && result.capture && 'type' in result.capture) {
142+
let runtimeSnapshot: RuntimeSnapshotV1 | undefined;
143+
if (result.capture.type === 'runtime-snapshot') {
144+
runtimeSnapshot = result.capture;
145+
} else if (result.capture.type === 'runtime-snapshot-unchanged') {
146+
runtimeSnapshot = getRuntimeSnapshot(params.simulatorId)?.payload;
147+
}
148+
149+
if (runtimeSnapshot) {
150+
ctx.nextSteps = createRuntimeSnapshotNextSteps({
151+
simulatorId: params.simulatorId,
152+
runtimeSnapshot,
153+
includeRefreshAndWait: true,
154+
});
155+
}
151156
}
152157
}
153158

src/utils/__tests__/structured-output-envelope.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,58 @@ describe('toStructuredEnvelope', () => {
138138
});
139139
});
140140

141+
it('caps compact runtime snapshot rows by category', () => {
142+
const targets = Array.from({ length: 80 }, (_, index) => ({
143+
ref: `e${index + 1}`,
144+
role: 'button' as const,
145+
label: `Target ${index + 1}`,
146+
frame: { x: 0, y: index, width: 100, height: 40 },
147+
actions: ['tap' as const],
148+
}));
149+
const scroll = Array.from({ length: 40 }, (_, index) => ({
150+
ref: `e${index + 81}`,
151+
role: 'scroll-view' as const,
152+
label: `Scroll ${index + 1}`,
153+
frame: { x: 0, y: index, width: 390, height: 600 },
154+
actions: ['swipeWithin' as const],
155+
}));
156+
const text = Array.from({ length: 70 }, (_, index) => ({
157+
ref: `e${index + 121}`,
158+
role: 'text' as const,
159+
label: `Text ${index + 1}`,
160+
frame: { x: 0, y: index, width: 100, height: 20 },
161+
state: { visible: true },
162+
actions: ['touch' as const],
163+
}));
164+
const result: CaptureResultDomainResult = {
165+
kind: 'capture-result',
166+
didError: false,
167+
error: null,
168+
summary: { status: 'SUCCEEDED' },
169+
artifacts: { simulatorId: 'SIMULATOR-1' },
170+
capture: {
171+
type: 'runtime-snapshot',
172+
protocol: 'rs/1',
173+
simulatorId: 'SIMULATOR-1',
174+
screenHash: 'large-screen',
175+
seq: 4,
176+
capturedAtMs: 1_000,
177+
expiresAtMs: 61_000,
178+
elements: [...targets, ...scroll, ...text],
179+
actions: [],
180+
},
181+
};
182+
183+
const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2');
184+
const data = envelope.data as {
185+
capture: { targets: string[]; scroll: string[]; text?: string[] };
186+
};
187+
188+
expect(data.capture.targets).toHaveLength(64);
189+
expect(data.capture.scroll).toHaveLength(32);
190+
expect(data.capture.text).toHaveLength(64);
191+
});
192+
141193
it('compacts unchanged runtime snapshot captures by default', () => {
142194
const result: CaptureResultDomainResult = {
143195
kind: 'capture-result',

src/utils/renderers/domain-result-text.ts

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,16 +1249,24 @@ function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElem
12491249
.map(({ element }) => element);
12501250
}
12511251

1252+
function getPrimaryRuntimeElementAction(element: RuntimeElementV1, action?: string): string {
1253+
if (action) {
1254+
return action;
1255+
}
1256+
if (element.actions.includes('typeText')) {
1257+
return 'typeText';
1258+
}
1259+
if (element.actions.includes('tap')) {
1260+
return 'tap';
1261+
}
1262+
if (element.actions.includes('swipeWithin')) {
1263+
return 'swipe';
1264+
}
1265+
return 'none';
1266+
}
1267+
12521268
function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): string {
1253-
const primaryAction =
1254-
action ??
1255-
(element.actions.includes('typeText')
1256-
? 'typeText'
1257-
: element.actions.includes('tap')
1258-
? 'tap'
1259-
: element.actions.includes('swipeWithin')
1260-
? 'swipe'
1261-
: 'none');
1269+
const primaryAction = getPrimaryRuntimeElementAction(element, action);
12621270
return [
12631271
element.ref,
12641272
primaryAction,
@@ -1466,17 +1474,14 @@ function createCaptureResultItems(
14661474
if (result.didError) {
14671475
items.push(...createStandardDiagnosticSections(result.diagnostics));
14681476
items.push(...createUiErrorItems(result.uiError));
1469-
items.push(
1470-
createStatus(
1471-
'error',
1472-
result.error ??
1473-
(isUiHierarchy
1474-
? isRuntimeSnapshot
1475-
? 'Failed to get runtime UI snapshot.'
1476-
: 'Failed to get accessibility hierarchy.'
1477-
: 'Failed to capture screenshot.'),
1478-
),
1479-
);
1477+
let fallbackError = 'Failed to capture screenshot.';
1478+
if (isRuntimeSnapshot) {
1479+
fallbackError = 'Failed to get runtime UI snapshot.';
1480+
} else if (isUiHierarchy) {
1481+
fallbackError = 'Failed to get accessibility hierarchy.';
1482+
}
1483+
1484+
items.push(createStatus('error', result.error ?? fallbackError));
14801485
return items;
14811486
}
14821487

0 commit comments

Comments
 (0)