Skip to content

Commit 6cc93b2

Browse files
cameroncookecodex
andcommitted
fix(ui-automation): Address Warden follow-up findings
Move the type_text changelog entry to the added section and keep user-facing notes free of internal runtime schema names. Preserve AXe fallback diagnostics when command output is empty and simplify small helper paths flagged by Warden. Co-Authored-By: Codex <noreply@openai.com>
1 parent 18193ec commit 6cc93b2

6 files changed

Lines changed: 47 additions & 36 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
- Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators.
88
- Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged.
99
- Added `batch` for executing multiple AXe UI automation steps in one simulator session.
10-
- Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique.
11-
- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions.
10+
- Added `wait_for_ui` for polling runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique.
11+
- Added structured element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions.
12+
- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it.
1213

1314
### Fixed
1415

@@ -29,7 +30,6 @@
2930
- Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders.
3031
- Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations.
3132
- Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target.
32-
- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it.
3333
- Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure.
3434
- Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons.
3535
- Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome.

src/mcp/tools/ui-automation/shared/domain-result.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ export function mapAxeCommandError(
183183
if (error instanceof AxeError) {
184184
return {
185185
message: messages.axeFailureMessage(error),
186-
diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])),
186+
diagnostics: createDiagnostics([], compact([error.axeOutput || error.message])),
187187
};
188188
}
189189

src/mcp/tools/ui-automation/shared/semantic-tap.ts

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,37 @@ function hasDuplicateSelectorMatch(params: {
6464
return matches.length > 1;
6565
}
6666

67+
function pickSemanticTapSelectorArgs(params: {
68+
element: RuntimeSnapshotElementRecord;
69+
elements: readonly RuntimeSnapshotElementRecord[];
70+
elementTypeArgs: readonly string[];
71+
extraArgs: readonly string[];
72+
}): string[] | null {
73+
const { element, elements, elementTypeArgs, extraArgs } = params;
74+
const { identifier, label, value } = element.publicElement;
75+
76+
if (element.publicElement.role === 'switch') return null;
77+
if (
78+
identifier &&
79+
!hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier })
80+
) {
81+
return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs];
82+
}
83+
if (label && !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })) {
84+
return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs];
85+
}
86+
if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) {
87+
return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs];
88+
}
89+
return null;
90+
}
91+
6792
export function createSemanticTapCommand(
6893
element: RuntimeSnapshotElementRecord,
6994
elementRef: string,
7095
extraArgs: readonly string[] = [],
7196
elements: readonly RuntimeSnapshotElementRecord[] = [element],
7297
): SemanticTapCommand {
73-
const { identifier, label, value } = element.publicElement;
7498
const activationPoint = getRuntimeElementActivationPoint(element);
7599
const elementType = axeElementTypeFor(element);
76100
const elementTypeArgs = elementType ? ['--element-type', elementType] : [];
@@ -87,25 +111,12 @@ export function createSemanticTapCommand(
87111
]
88112
: ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs];
89113

90-
const selectorArgs = (() => {
91-
if (element.publicElement.role === 'switch') return null;
92-
if (
93-
identifier &&
94-
!hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier })
95-
) {
96-
return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs];
97-
}
98-
if (
99-
label &&
100-
!hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })
101-
) {
102-
return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs];
103-
}
104-
if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) {
105-
return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs];
106-
}
107-
return null;
108-
})();
114+
const selectorArgs = pickSemanticTapSelectorArgs({
115+
element,
116+
elements,
117+
elementTypeArgs,
118+
extraArgs,
119+
});
109120

110121
return {
111122
selectorArgs,

src/snapshot-tests/json-normalize.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,15 +249,14 @@ function normalizeXcodeBridgeCallEnvelope(
249249
return envelope;
250250
}
251251

252-
const normalizedEnvelope: StructuredOutputEnvelope<unknown> = {
252+
return {
253253
...envelope,
254254
data: {
255255
...data,
256256
content: [],
257257
...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}),
258258
},
259259
};
260-
return normalizedEnvelope;
261260
}
262261

263262
export function normalizeStructuredEnvelope(

src/utils/responses/next-step-formatting.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,7 @@ function formatNextStepForCli(step: NextStep): string {
6767
}
6868

6969
function formatMcpValue(value: NextStepParamValue): string {
70-
if (typeof value === 'string') {
71-
return JSON.stringify(value);
72-
}
73-
if (typeof value === 'object' && value !== null) {
70+
if (typeof value === 'string' || (typeof value === 'object' && value !== null)) {
7471
return JSON.stringify(value);
7572
}
7673
return String(value);

src/utils/structured-output-envelope.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,12 +190,16 @@ function compactRuntimeElementRow(element: RuntimeElementV1, action: string): st
190190
}
191191

192192
function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' {
193-
return (
194-
(element.actions.includes('typeText') && 'typeText') ||
195-
(element.actions.includes('tap') && 'tap') ||
196-
(element.actions.includes('swipeWithin') && 'swipeWithin') ||
197-
'none'
198-
);
193+
if (element.actions.includes('typeText')) {
194+
return 'typeText';
195+
}
196+
if (element.actions.includes('tap')) {
197+
return 'tap';
198+
}
199+
if (element.actions.includes('swipeWithin')) {
200+
return 'swipeWithin';
201+
}
202+
return 'none';
199203
}
200204

201205
function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean {

0 commit comments

Comments
 (0)