Skip to content

Commit 1b64eee

Browse files
cameroncookecodex
andcommitted
fix(ui-automation): Resolve remaining review threads
Allow drag to use refs that support either direct touch or swipeWithin so application and window scroll targets are not rejected before drag point selection can run. Keep swipe semantic-only while adding explicit migration guidance for legacy coordinate arguments, and update stale smoke coverage to use runtime refs instead of coordinates. Cache scroll descendant facts during runtime snapshot normalization so scroll inference preserves behavior without repeated full-tree scans. Co-Authored-By: Codex <noreply@openai.com>
1 parent 6b98258 commit 1b64eee

8 files changed

Lines changed: 418 additions & 95 deletions

File tree

src/mcp/tools/ui-automation/__tests__/drag.test.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,44 @@ describe('Drag Tool', () => {
178178
]);
179179
});
180180

181+
it('does not require touch support for swipeWithin-only drag targets', async () => {
182+
recordSnapshot([
183+
createNode({
184+
type: 'Application',
185+
role: 'AXApplication',
186+
frame: { x: 0, y: 0, width: 402, height: 874 },
187+
}),
188+
]);
189+
const snapshot = getRuntimeSnapshot(simulatorId);
190+
snapshot!.elements[0]!.publicElement.actions = ['swipeWithin'];
191+
snapshot!.payload.elements[0]!.actions = ['swipeWithin'];
192+
const { calls, executor } = createTrackingExecutor();
193+
194+
const result = await runDrag({ simulatorId, elementRef: 'e1', direction: 'down' }, executor);
195+
196+
expect(result.action).toMatchObject({
197+
type: 'drag',
198+
elementRef: 'e1',
199+
direction: 'down',
200+
from: { x: 201, y: 131 },
201+
to: { x: 201, y: 743 },
202+
});
203+
expect(calls[0]?.command).toEqual([
204+
'/mocked/axe/path',
205+
'drag',
206+
'--start-x',
207+
'201',
208+
'--start-y',
209+
'131',
210+
'--end-x',
211+
'201',
212+
'--end-y',
213+
'743',
214+
'--udid',
215+
simulatorId,
216+
]);
217+
});
218+
181219
it('uses directional drag points for cell targets instead of in-cell swipe strokes', async () => {
182220
recordSnapshot([
183221
createNode({
@@ -246,6 +284,25 @@ describe('Drag Tool', () => {
246284
expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' });
247285
expect(calls).toEqual([]);
248286
});
287+
288+
it('reports that drag requires touch or swipeWithin support', async () => {
289+
recordSnapshot([createNode()]);
290+
const snapshot = getRuntimeSnapshot(simulatorId);
291+
snapshot!.elements[0]!.publicElement.actions = ['tap'];
292+
snapshot!.payload.elements[0]!.actions = ['tap'];
293+
const { calls, executor } = createTrackingExecutor();
294+
295+
const result = await runDrag({ simulatorId, elementRef: 'e1', direction: 'up' }, executor);
296+
297+
expect(result.didError).toBe(true);
298+
expect(result.uiError).toMatchObject({
299+
code: 'TARGET_NOT_ACTIONABLE',
300+
elementRef: 'e1',
301+
message: "Element ref 'e1' does not support 'touch' or 'swipeWithin'.",
302+
recoveryHint: expect.stringContaining("'touch' or 'swipeWithin'"),
303+
});
304+
expect(calls).toEqual([]);
305+
});
249306
});
250307

251308
describe('Handler Behavior', () => {

src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,141 @@ describe('runtime snapshot normalization', () => {
376376
);
377377
});
378378

379+
it('infers swipeWithin from deeply nested overflowing descendants', () => {
380+
const snapshot = createRuntimeSnapshotRecord({
381+
simulatorId,
382+
uiHierarchy: [
383+
createNode({
384+
type: 'Other',
385+
role: 'AXGroup',
386+
AXLabel: 'Scrollable panel',
387+
frame: { x: 0, y: 0, width: 300, height: 300 },
388+
children: [
389+
createNode({
390+
type: 'Other',
391+
role: 'AXGroup',
392+
frame: { x: 10, y: 10, width: 280, height: 280 },
393+
children: [
394+
createNode({
395+
type: 'Other',
396+
role: 'AXGroup',
397+
frame: { x: 20, y: 20, width: 260, height: 260 },
398+
children: [
399+
createNode({
400+
type: 'StaticText',
401+
role: 'AXStaticText',
402+
AXLabel: 'Overflow',
403+
frame: { x: 30, y: 360, width: 120, height: 20 },
404+
}),
405+
],
406+
}),
407+
],
408+
}),
409+
],
410+
}),
411+
],
412+
nowMs: 1_000,
413+
});
414+
415+
expect(snapshot.payload.elements[0]).toEqual(
416+
expect.objectContaining({
417+
role: 'other',
418+
label: 'Scrollable panel',
419+
actions: expect.arrayContaining(['swipeWithin']),
420+
}),
421+
);
422+
});
423+
424+
it('does not infer root viewport swipeWithin when a sheet grabber is nested', () => {
425+
const root = createNode({
426+
type: 'Application',
427+
role: 'AXApplication',
428+
AXLabel: 'Example',
429+
frame: { x: 0, y: 0, width: 390, height: 844 },
430+
children: [
431+
createNode({
432+
type: 'Other',
433+
role: 'AXGroup',
434+
frame: { x: 0, y: 0, width: 390, height: 120 },
435+
children: [
436+
createNode({
437+
type: 'Button',
438+
role: 'AXButton',
439+
AXLabel: 'Sheet Grabber',
440+
frame: { x: 157, y: 56, width: 76, height: 24 },
441+
}),
442+
],
443+
}),
444+
createNode({
445+
type: 'StaticText',
446+
role: 'AXStaticText',
447+
AXLabel: 'More content below',
448+
frame: { x: 40, y: 920, width: 220, height: 24 },
449+
}),
450+
],
451+
});
452+
453+
const snapshot = createRuntimeSnapshotRecord({
454+
simulatorId,
455+
uiHierarchy: [root],
456+
nowMs: 1_000,
457+
});
458+
459+
expect(snapshot.payload.elements[0]).toEqual(
460+
expect.objectContaining({
461+
role: 'application',
462+
label: 'Example',
463+
actions: [],
464+
}),
465+
);
466+
});
467+
468+
it('does not infer root viewport swipeWithin when a better nested scroll target exists', () => {
469+
const root = createNode({
470+
type: 'Application',
471+
role: 'AXApplication',
472+
AXLabel: 'Example',
473+
frame: { x: 0, y: 0, width: 390, height: 844 },
474+
children: [
475+
createNode({
476+
type: 'Other',
477+
role: 'AXGroup',
478+
frame: { x: 0, y: 100, width: 390, height: 600 },
479+
children: [
480+
createNode({
481+
type: 'ScrollView',
482+
role: 'AXScrollArea',
483+
AXIdentifier: 'app.nestedContentPanel',
484+
frame: { x: 0, y: 100, width: 390, height: 600 },
485+
}),
486+
],
487+
}),
488+
createNode({
489+
type: 'StaticText',
490+
role: 'AXStaticText',
491+
AXLabel: 'Additional details below',
492+
frame: { x: 40, y: 920, width: 220, height: 24 },
493+
}),
494+
],
495+
});
496+
497+
const snapshot = createRuntimeSnapshotRecord({
498+
simulatorId,
499+
uiHierarchy: [root],
500+
nowMs: 1_000,
501+
});
502+
503+
expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin');
504+
expect(
505+
snapshot.payload.elements.find((element) => element.identifier === 'app.nestedContentPanel'),
506+
).toEqual(
507+
expect.objectContaining({
508+
role: 'scroll-view',
509+
actions: expect.arrayContaining(['swipeWithin']),
510+
}),
511+
);
512+
});
513+
379514
it('does not synthesize a foreground sheet scroll region without a real scroll descendant', () => {
380515
const root = createNode({
381516
type: 'Application',

src/mcp/tools/ui-automation/__tests__/swipe.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,26 @@ describe('Swipe Tool', () => {
351351
expect(result.content[0].text).toContain('simulatorId is required');
352352
});
353353

354+
it('returns migration guidance for removed coordinate parameters', async () => {
355+
sessionStore.setDefaults({ simulatorId });
356+
357+
const result = await callHandler(handler, {
358+
x1: 50,
359+
y1: 100,
360+
x2: 50,
361+
y2: 500,
362+
delta: 10,
363+
});
364+
365+
expect(result.isError).toBe(true);
366+
expect(result.content[0].text).toContain('Coordinate-based swipe parameters');
367+
expect(result.content[0].text).toContain('snapshot_ui');
368+
expect(result.content[0].text).toContain('withinElementRef');
369+
expect(result.content[0].text).toContain('direction');
370+
expect(result.content[0].text).toContain('gesture presets');
371+
expect(result.content[0].text).toContain('x1, y1, x2, y2, delta');
372+
});
373+
354374
it('returns ACTION_FAILED when AXe fails after ref resolution', async () => {
355375
recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]);
356376

src/mcp/tools/ui-automation/drag.ts

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import {
1717
getHandlerContext,
1818
toInternalSchema,
1919
} from '../../../utils/typed-tool-factory.ts';
20-
import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts';
20+
import { clearRuntimeSnapshot, resolveElementRefForAnyAction } from './shared/snapshot-ui-state.ts';
2121
import {
2222
getRuntimeElementDirectionalDragPoints,
2323
getRuntimeElementCenter,
@@ -87,10 +87,20 @@ const publicSchemaObject = z.strictObject(dragSchema.omit({ simulatorId: true }
8787

8888
const LOG_PREFIX = '[AXe]';
8989

90-
function usesWithinElementDragPoints(role: string | undefined): boolean {
90+
function prefersWithinElementDragPoints(role: string | undefined): boolean {
9191
return role === 'application' || role === 'window' || role === 'scroll-view' || role === 'list';
9292
}
9393

94+
function shouldUseWithinElementDragPoints(
95+
actions: readonly string[],
96+
role: string | undefined,
97+
): boolean {
98+
return (
99+
actions.includes('swipeWithin') &&
100+
(!actions.includes('touch') || prefersWithinElementDragPoints(role))
101+
);
102+
}
103+
94104
export function createDragExecutor(
95105
executor: CommandExecutor,
96106
axeHelpers: AxeHelpers = defaultAxeHelpers,
@@ -108,25 +118,26 @@ export function createDragExecutor(
108118
...(steps !== undefined ? { steps } : {}),
109119
};
110120

111-
const resolution = resolveElementRef(simulatorId, elementRef, 'touch');
121+
const resolution = resolveElementRefForAnyAction(simulatorId, elementRef, [
122+
'touch',
123+
'swipeWithin',
124+
]);
112125
if (!resolution.ok) {
113126
return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, {
114127
uiError: resolution.error,
115128
});
116129
}
117130

118131
const viewportFrame = resolution.snapshot.elements[0]?.publicElement.frame;
119-
const { role } = resolution.element.publicElement;
120-
const points =
121-
resolution.element.publicElement.actions.includes('swipeWithin') &&
122-
usesWithinElementDragPoints(role)
123-
? getRuntimeElementSwipePoints(resolution.element, direction, distance)
124-
: getRuntimeElementDirectionalDragPoints(
125-
resolution.element,
126-
direction,
127-
distance,
128-
viewportFrame,
129-
);
132+
const { actions, role } = resolution.element.publicElement;
133+
const points = shouldUseWithinElementDragPoints(actions, role)
134+
? getRuntimeElementSwipePoints(resolution.element, direction, distance)
135+
: getRuntimeElementDirectionalDragPoints(
136+
resolution.element,
137+
direction,
138+
distance,
139+
viewportFrame,
140+
);
130141
if (!points.ok) {
131142
const uiError = createUiAutomationRecoverableError({
132143
code: 'TARGET_NOT_ACTIONABLE',

0 commit comments

Comments
 (0)