Skip to content

Commit e8d5a06

Browse files
cameroncookecodex
andcommitted
fix(tests): Address Warden runtime feedback
Tighten snapshot harness error-state checks, keep error envelopes compatible with the error schema, and simplify the response helper cleanup Warden flagged. Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 3d388b8 commit e8d5a06

12 files changed

Lines changed: 96 additions & 24 deletions

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
### Added
66

7-
- Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints.
7+
- Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators.
88

99
## [2.5.2]
1010

src/snapshot-tests/__tests__/json-harness-error-state.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ describe('JSON snapshot harness error state', () => {
2323
expect(resolveCliJsonSnapshotErrorState(1, errorEnvelope, 'tool')).toBe(true);
2424
});
2525

26+
it('rejects null CLI process status', () => {
27+
expect(() => resolveCliJsonSnapshotErrorState(null, successEnvelope, 'tool')).toThrow(
28+
'CLI process exit status was null for tool; the process may have timed out or been killed by a signal.',
29+
);
30+
});
31+
2632
it('rejects CLI process status and envelope.didError disagreement', () => {
2733
expect(() => resolveCliJsonSnapshotErrorState(1, successEnvelope, 'tool')).toThrow(
2834
'CLI process exit status (1) disagrees with envelope.didError (false)',
@@ -44,5 +50,8 @@ describe('JSON snapshot harness error state', () => {
4450
expect(() => resolveMcpSnapshotErrorState(false, true, 'tool')).toThrow(
4551
'MCP result.isError (false) disagrees with structuredContent.didError (true)',
4652
);
53+
expect(() => resolveMcpSnapshotErrorState(undefined, true, 'tool')).toThrow(
54+
'MCP result.isError (undefined) disagrees with structuredContent.didError (true)',
55+
);
4756
});
4857
});

src/snapshot-tests/harness.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,20 @@ export function resolveCliJsonSnapshotErrorState(
7373
envelope: NonNullable<SnapshotResult['structuredEnvelope']>,
7474
label: string,
7575
): boolean {
76+
if (status === null) {
77+
throw new Error(
78+
`CLI process exit status was null for ${label}; the process may have timed out or been killed by a signal.`,
79+
);
80+
}
81+
7682
const processDidError = status !== 0;
7783
if (processDidError !== envelope.didError) {
7884
throw new Error(
7985
`${label}: CLI process exit status (${status ?? 'null'}) disagrees with envelope.didError (${envelope.didError}).`,
8086
);
8187
}
8288

83-
return processDidError || envelope.didError;
89+
return processDidError;
8490
}
8591

8692
export async function createSnapshotHarness(

src/snapshot-tests/json-normalize.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@ function isRecord(value: unknown): value is Record<string, unknown> {
55
return value !== null && typeof value === 'object';
66
}
77

8+
function normalizeBaseString(value: string): string {
9+
const normalized = normalizeSnapshotOutput(value.replace(/\u00A0/g, ' '));
10+
return normalized.endsWith('\n') ? normalized.slice(0, -1) : normalized;
11+
}
12+
813
function normalizeString(value: string, key?: string, path: string[] = []): string {
914
const parentKey = path.at(-2);
10-
const normalized = normalizeSnapshotOutput(value.replace(/\u00A0/g, ' '));
11-
let result = normalized.endsWith('\n') ? normalized.slice(0, -1) : normalized;
15+
let result = normalizeBaseString(value);
1216

1317
if (parentKey === 'stderr') {
1418
result = result.replace(/^\[\d+\/\d+\] /, '[<STEP>] ');
@@ -131,10 +135,8 @@ function normalizeBuildSettingsEntryValue(key: string, value: string): string {
131135
case 'TARGET_DEVICE_OS_VERSION':
132136
case 'ASSETCATALOG_FILTER_FOR_DEVICE_OS_VERSION':
133137
return '<OS_VERSION>';
134-
default: {
135-
const normalized = normalizeSnapshotOutput(value.replace(/\u00A0/g, ' '));
136-
return normalized.endsWith('\n') ? normalized.slice(0, -1) : normalized;
137-
}
138+
default:
139+
return normalizeBaseString(value);
138140
}
139141
}
140142

@@ -252,10 +254,13 @@ export function normalizeStructuredEnvelope(
252254
) as StructuredOutputEnvelope<unknown>;
253255
}
254256

257+
const FRAME_OBJECT_REGEX =
258+
/"frame": \{\n\s+"y": (?<y>\d+(?:\.\d+)?),\n\s+"x": (?<x>\d+(?:\.\d+)?),\n\s+"width": (?<width>\d+(?:\.\d+)?),\n\s+"height": (?<height>\d+(?:\.\d+)?)\n\s+\}/g;
259+
255260
function compactFrameObjects(json: string): string {
256261
return json.replace(
257-
/"frame": \{\n\s+"y": (\d+(?:\.\d+)?),\n\s+"x": (\d+(?:\.\d+)?),\n\s+"width": (\d+(?:\.\d+)?),\n\s+"height": (\d+(?:\.\d+)?)\n\s+\}/g,
258-
'"frame": { "x": $2, "y": $1, "width": $3, "height": $4 }',
262+
FRAME_OBJECT_REGEX,
263+
'"frame": { "x": $<x>, "y": $<y>, "width": $<width>, "height": $<height> }',
259264
);
260265
}
261266

src/snapshot-tests/mcp-harness.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ export function resolveMcpSnapshotErrorState(
9494
const didTransportError = transportDidError ?? false;
9595
if (envelopeDidError !== undefined && didTransportError !== envelopeDidError) {
9696
throw new Error(
97-
`MCP result.isError (${didTransportError}) disagrees with structuredContent.didError (${envelopeDidError}) for ${label}.`,
97+
`MCP result.isError (${String(transportDidError)}) disagrees with structuredContent.didError (${envelopeDidError}) for ${label}.`,
9898
);
9999
}
100100

101-
return didTransportError || envelopeDidError === true;
101+
return didTransportError;
102102
}
103103

104104
export async function createMcpSnapshotHarness(

src/snapshot-tests/normalize.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ const BUILD_SETTINGS_GROUP_REGEX = /^(\s*(?:ALTERNATE_GROUP|GROUP|INSTALL_GROUP)
5454
const BUILD_SETTINGS_GID_REGEX = /^(\s*GID = )\d+$/gm;
5555
const SDK_PATH_REGEX =
5656
/^(\s*(?:CORRESPONDING_SIMULATOR_SDK_DIR|SDKROOT|SDK_DIR(?:_[A-Za-z0-9_]+)?) = ).+$/gm;
57+
const SDK_DIR_PLACEHOLDER_KEY_REGEX = /^(\s*)SDK_DIR_[A-Za-z0-9_]+ = <SDK_PATH>$/gm;
5758
const SDK_NAME_REGEX = /^(\s*(?:CORRESPONDING_SIMULATOR_SDK_NAME|SDK_NAMES?) = ).+$/gm;
5859
const SDK_BUILD_VERSION_REGEX =
5960
/^(\s*(?:PLATFORM_PRODUCT_BUILD_VERSION|SDK_PRODUCT_BUILD_VERSION|MAC_OS_X_PRODUCT_BUILD_VERSION) = ).+$/gm;
@@ -230,7 +231,7 @@ export function normalizeSnapshotOutput(text: string): string {
230231
normalized = normalized.replace(BUILD_SETTINGS_GID_REGEX, '$1<GID>');
231232
normalized = normalized.replace(SDK_PATH_REGEX, '$1<SDK_PATH>');
232233
normalized = normalized.replace(
233-
/^(\s*)SDK_DIR_[A-Za-z0-9_]+ = <SDK_PATH>$/gm,
234+
SDK_DIR_PLACEHOLDER_KEY_REGEX,
234235
'$1SDK_DIR_<SDK_NAME> = <SDK_PATH>',
235236
);
236237
normalized = normalized.replace(SDK_NAME_REGEX, '$1<SDK_NAME>');

src/utils/__tests__/structured-output-envelope.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,33 @@ describe('toStructuredEnvelope', () => {
7171
).toEqual(expectedEnvelope);
7272
});
7373

74+
it('does not serialize next steps on error envelopes because the error schema has no nextSteps field', () => {
75+
const result: BuildResultDomainResult = {
76+
kind: 'build-result',
77+
didError: true,
78+
error: 'Build failed',
79+
};
80+
81+
expect(
82+
toStructuredEnvelope(result, 'xcodebuildmcp.output.error', '1', {
83+
nextSteps: [
84+
{
85+
label: 'Retry build',
86+
cliTool: 'build',
87+
workflow: 'project',
88+
params: { scheme: 'CalculatorApp' },
89+
},
90+
],
91+
}),
92+
).toEqual({
93+
schema: 'xcodebuildmcp.output.error',
94+
schemaVersion: '1',
95+
didError: true,
96+
error: 'Build failed',
97+
data: null,
98+
});
99+
});
100+
74101
it('serializes next steps as rendered CLI command lines by default sorted by priority', () => {
75102
const result: DeviceListDomainResult = {
76103
kind: 'device-list',

src/utils/renderers/cli-text-renderer.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { RuntimeKind } from '../../runtime/types.ts';
12
import type { NextStep } from '../../types/common.ts';
23
import type { StructuredToolOutput } from '../../rendering/types.ts';
34
import type { FilePathRenderStyle } from '../runtime-config-types.ts';
@@ -86,7 +87,7 @@ export interface CliTextTranscriptInput {
8687
items?: readonly AnyFragment[];
8788
structuredOutput?: StructuredToolOutput;
8889
nextSteps?: readonly NextStep[];
89-
nextStepsRuntime?: 'cli' | 'daemon' | 'mcp';
90+
nextStepsRuntime?: RuntimeKind;
9091
suppressWarnings?: boolean;
9192
showTestTiming?: boolean;
9293
filePathRenderStyle?: FilePathRenderStyle;
@@ -127,7 +128,7 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen
127128
let sawIncomingSummaryEvent = false;
128129
let sawIncomingNonSummaryEvent = false;
129130
let nextSteps: readonly NextStep[] = [];
130-
let nextStepsRuntime: 'cli' | 'daemon' | 'mcp' | undefined;
131+
let nextStepsRuntime: RuntimeKind | undefined;
131132
let sawProgressNextSteps = false;
132133
let lastRenderedTestProgressKey: string | null = null;
133134
let pendingStreamedSummary: SummaryTextBlock | null = null;
@@ -424,7 +425,7 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen
424425
structuredOutput = output;
425426
},
426427

427-
setNextSteps(steps: readonly NextStep[], runtime: 'cli' | 'daemon' | 'mcp'): void {
428+
setNextSteps(steps: readonly NextStep[], runtime: RuntimeKind): void {
428429
nextSteps = [...steps];
429430
nextStepsRuntime = runtime;
430431
},

src/utils/responses/__tests__/next-steps-renderer.test.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,20 @@ describe('next-steps-renderer', () => {
185185
expect(result).toBe('Open the Simulator app: open_sim()');
186186
});
187187

188+
it('trims label whitespace before rendering command steps', () => {
189+
const step: NextStep = {
190+
tool: 'boot_sim',
191+
cliTool: 'boot',
192+
workflow: 'simulator',
193+
label: ' Boot simulator ',
194+
params: { simulatorId: 'SIM-1' },
195+
};
196+
197+
expect(renderNextStep(step, 'cli')).toBe(
198+
'Boot simulator: xcodebuildmcp simulator boot --simulator-id SIM-1',
199+
);
200+
});
201+
188202
it('should render label-only step as plain text', () => {
189203
const step: NextStep = {
190204
label: 'Verify layout visually before continuing',

src/utils/responses/next-step-formatting.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ export interface FormatNextStepOptions {
1010
const SHELL_SAFE_UNQUOTED_ARG = /^[A-Za-z0-9_@%+=:,./~-]+$/;
1111

1212
function resolveLabel(step: NextStep): string {
13-
if (step.label?.trim()) return step.label;
13+
const label = step.label?.trim();
14+
if (label) return label;
1415
if (step.tool) return step.tool;
1516
if (step.cliTool) return step.cliTool;
1617
return 'Next action';
@@ -76,11 +77,12 @@ export function formatNextStep(step: NextStep, options: FormatNextStepOptions):
7677
const formatted =
7778
options.runtime === 'cli' ? formatNextStepForCli(step) : formatNextStepForMcp(step);
7879

79-
if (!step.label || formatted === step.label) {
80+
const label = resolveLabel(step);
81+
if (!step.label?.trim() || formatted === label) {
8082
return formatted;
8183
}
8284

83-
return `${step.label}: ${formatted}`;
85+
return `${label}: ${formatted}`;
8486
}
8587

8688
export function serializeNextSteps(

0 commit comments

Comments
 (0)