fix(ui-automation): Address Warden follow-up findings

cameroncooke · codex · cameroncooke · commit 6cc93b2129eb · 2026-05-19T11:24:53.000+01:00
Move the type_text changelog entry to the added section and keep user-facing notes free of internal runtime schema names.

Preserve AXe fallback diagnostics when command output is empty and simplify small helper paths flagged by Warden.
Co-Authored-By: Codex &lt;noreply@openai.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,8 +7,9 @@
 - Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators.
 - Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged.
 - Added `batch` for executing multiple AXe UI automation steps in one simulator session.
-- Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique.
-- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions.
+- Added `wait_for_ui` for polling runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique.
+- Added structured element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions.
+- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it.
 
 ### Fixed
 
@@ -29,7 +30,6 @@
 - Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders.
 - Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations.
 - Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target.
-- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it.
 - Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure.
 - Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons.
 - Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome.
diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts
@@ -183,7 +183,7 @@ export function mapAxeCommandError(
   if (error instanceof AxeError) {
     return {
       message: messages.axeFailureMessage(error),
-      diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])),
+      diagnostics: createDiagnostics([], compact([error.axeOutput || error.message])),
     };
   }
 
diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts
@@ -64,13 +64,37 @@ function hasDuplicateSelectorMatch(params: {
   return matches.length > 1;
 }
 
+function pickSemanticTapSelectorArgs(params: {
+  element: RuntimeSnapshotElementRecord;
+  elements: readonly RuntimeSnapshotElementRecord[];
+  elementTypeArgs: readonly string[];
+  extraArgs: readonly string[];
+}): string[] | null {
+  const { element, elements, elementTypeArgs, extraArgs } = params;
+  const { identifier, label, value } = element.publicElement;
+
+  if (element.publicElement.role === 'switch') return null;
+  if (
+    identifier &&
+    !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier })
+  ) {
+    return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs];
+  }
+  if (label && !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })) {
+    return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs];
+  }
+  if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) {
+    return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs];
+  }
+  return null;
+}
+
 export function createSemanticTapCommand(
   element: RuntimeSnapshotElementRecord,
   elementRef: string,
   extraArgs: readonly string[] = [],
   elements: readonly RuntimeSnapshotElementRecord[] = [element],
 ): SemanticTapCommand {
-  const { identifier, label, value } = element.publicElement;
   const activationPoint = getRuntimeElementActivationPoint(element);
   const elementType = axeElementTypeFor(element);
   const elementTypeArgs = elementType ? ['--element-type', elementType] : [];
@@ -87,25 +111,12 @@ export function createSemanticTapCommand(
         ]
       : ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs];
 
-  const selectorArgs = (() => {
-    if (element.publicElement.role === 'switch') return null;
-    if (
-      identifier &&
-      !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier })
-    ) {
-      return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs];
-    }
-    if (
-      label &&
-      !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })
-    ) {
-      return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs];
-    }
-    if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) {
-      return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs];
-    }
-    return null;
-  })();
+  const selectorArgs = pickSemanticTapSelectorArgs({
+    element,
+    elements,
+    elementTypeArgs,
+    extraArgs,
+  });
 
   return {
     selectorArgs,
diff --git a/src/snapshot-tests/json-normalize.ts b/src/snapshot-tests/json-normalize.ts
@@ -249,15 +249,14 @@ function normalizeXcodeBridgeCallEnvelope(
     return envelope;
   }
 
-  const normalizedEnvelope: StructuredOutputEnvelope<unknown> = {
+  return {
     ...envelope,
     data: {
       ...data,
       content: [],
       ...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}),
     },
   };
-  return normalizedEnvelope;
 }
 
 export function normalizeStructuredEnvelope(
diff --git a/src/utils/responses/next-step-formatting.ts b/src/utils/responses/next-step-formatting.ts
@@ -67,10 +67,7 @@ function formatNextStepForCli(step: NextStep): string {
 }
 
 function formatMcpValue(value: NextStepParamValue): string {
-  if (typeof value === 'string') {
-    return JSON.stringify(value);
-  }
-  if (typeof value === 'object' && value !== null) {
+  if (typeof value === 'string' || (typeof value === 'object' && value !== null)) {
     return JSON.stringify(value);
   }
   return String(value);
diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts
@@ -190,12 +190,16 @@ function compactRuntimeElementRow(element: RuntimeElementV1, action: string): st
 }
 
 function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' {
-  return (
-    (element.actions.includes('typeText') && 'typeText') ||
-    (element.actions.includes('tap') && 'tap') ||
-    (element.actions.includes('swipeWithin') && 'swipeWithin') ||
-    'none'
-  );
+  if (element.actions.includes('typeText')) {
+    return 'typeText';
+  }
+  if (element.actions.includes('tap')) {
+    return 'tap';
+  }
+  if (element.actions.includes('swipeWithin')) {
+    return 'swipeWithin';
+  }
+  return 'none';
 }
 
 function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean {

Original file line number	Diff line number	Diff line change
`@@ -183,7 +183,7 @@ export function mapAxeCommandError(`
`183`	`183`	`if (error instanceof AxeError) {`
`184`	`184`	`return {`
`185`	`185`	`message: messages.axeFailureMessage(error),`
`186`		`- diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])),`
	`186`	`+ diagnostics: createDiagnostics([], compact([error.axeOutput \|\| error.message])),`
`187`	`187`	`};`
`188`	`188`	`}`
`189`	`189`
Original file line number	Diff line number	Diff line change
`@@ -67,10 +67,7 @@ function formatNextStepForCli(step: NextStep): string {`
`67`	`67`	`}`
`68`	`68`
`69`	`69`	`function formatMcpValue(value: NextStepParamValue): string {`
`70`		`- if (typeof value === 'string') {`
`71`		`- return JSON.stringify(value);`
`72`		`- }`
`73`		`- if (typeof value === 'object' && value !== null) {`
	`70`	`+ if (typeof value === 'string' \|\| (typeof value === 'object' && value !== null)) {`
`74`	`71`	`return JSON.stringify(value);`
`75`	`72`	`}`
`76`	`73`	`return String(value);`