Skip to content

Commit f7f0452

Browse files
cameroncookecodex
andcommitted
fix: Keep shutdown cleanup diagnostic-only
Record embedded housekeeping cleanup errors as shutdown diagnostics instead of failed shutdown steps. Keep diagnostic-only cleanup telemetry at info severity so best-effort cleanup does not look like an operational failure. Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 29af12d commit f7f0452

3 files changed

Lines changed: 61 additions & 29 deletions

File tree

src/server/__tests__/mcp-shutdown.test.ts

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ describe('runMcpShutdown', () => {
113113
expect(mocks.stopAllTrackedProcesses).toHaveBeenCalledTimes(1);
114114
});
115115

116-
it('marks workspace filesystem cleanup as failed when embedded errors are reported', async () => {
116+
it('records workspace filesystem cleanup diagnostics without failing the step', async () => {
117117
mocks.cleanupOwnedWorkspaceFilesystemArtifacts.mockResolvedValueOnce({
118118
workspaceKey: 'workspace-a',
119119
trigger: 'shutdown',
@@ -135,10 +135,12 @@ describe('runMcpShutdown', () => {
135135
const filesystemStep = result.steps.find(
136136
(step) => step.name === 'workspace-filesystem.cleanup-owned',
137137
);
138-
expect(filesystemStep?.status).toBe('failed');
138+
expect(filesystemStep?.status).toBe('completed');
139+
expect(filesystemStep?.diagnosticCount).toBe(1);
140+
expect(filesystemStep?.diagnostics).toEqual(['could not delete stale oslog file']);
139141
});
140142

141-
it('marks video capture cleanup as failed when embedded errors are reported', async () => {
143+
it('records video capture cleanup diagnostics without failing the step', async () => {
142144
mocks.stopAllVideoCaptureSessions.mockResolvedValueOnce({
143145
stoppedSessionCount: 0,
144146
errorCount: 1,
@@ -152,10 +154,12 @@ describe('runMcpShutdown', () => {
152154
});
153155

154156
const videoStep = result.steps.find((step) => step.name === 'video-capture.stop-all');
155-
expect(videoStep?.status).toBe('failed');
157+
expect(videoStep?.status).toBe('completed');
158+
expect(videoStep?.diagnosticCount).toBe(1);
159+
expect(videoStep?.diagnostics).toEqual(['failed to stop recorder']);
156160
});
157161

158-
it('marks video capture cleanup as failed when errorCount is zero but errors are reported', async () => {
162+
it('records video capture diagnostics when errorCount is zero but errors are reported', async () => {
159163
mocks.stopAllVideoCaptureSessions.mockResolvedValueOnce({
160164
stoppedSessionCount: 0,
161165
errorCount: 0,
@@ -169,10 +173,12 @@ describe('runMcpShutdown', () => {
169173
});
170174

171175
const videoStep = result.steps.find((step) => step.name === 'video-capture.stop-all');
172-
expect(videoStep?.status).toBe('failed');
176+
expect(videoStep?.status).toBe('completed');
177+
expect(videoStep?.diagnosticCount).toBe(1);
178+
expect(videoStep?.diagnostics).toEqual(['failed to stop recorder']);
173179
});
174180

175-
it('marks swift tracked process cleanup as failed when embedded errors are reported', async () => {
181+
it('records swift tracked process cleanup diagnostics without failing the step', async () => {
176182
mocks.stopAllTrackedProcesses.mockResolvedValueOnce({
177183
stoppedProcessCount: 0,
178184
errorCount: 1,
@@ -186,10 +192,12 @@ describe('runMcpShutdown', () => {
186192
});
187193

188194
const swiftStep = result.steps.find((step) => step.name === 'swift-processes.stop-all');
189-
expect(swiftStep?.status).toBe('failed');
195+
expect(swiftStep?.status).toBe('completed');
196+
expect(swiftStep?.diagnosticCount).toBe(1);
197+
expect(swiftStep?.diagnostics).toEqual(['failed to terminate swift process']);
190198
});
191199

192-
it('marks swift tracked process cleanup as failed when errorCount is zero but errors are reported', async () => {
200+
it('records swift tracked process diagnostics when errorCount is zero but errors are reported', async () => {
193201
mocks.stopAllTrackedProcesses.mockResolvedValueOnce({
194202
stoppedProcessCount: 0,
195203
errorCount: 0,
@@ -203,7 +211,9 @@ describe('runMcpShutdown', () => {
203211
});
204212

205213
const swiftStep = result.steps.find((step) => step.name === 'swift-processes.stop-all');
206-
expect(swiftStep?.status).toBe('failed');
214+
expect(swiftStep?.status).toBe('completed');
215+
expect(swiftStep?.diagnosticCount).toBe(1);
216+
expect(swiftStep?.diagnostics).toEqual(['failed to terminate swift process']);
207217
});
208218

209219
it('adds outer timeout headroom for one-item bulk cleanup', async () => {

src/server/mcp-shutdown.ts

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ export interface ShutdownStepResult {
3030
status: ShutdownStepStatus;
3131
durationMs: number;
3232
error?: string;
33+
diagnosticCount?: number;
34+
diagnostics?: string[];
3335
}
3436

3537
interface ShutdownStepOutcome<T> {
@@ -112,12 +114,22 @@ function buildExitCode(reason: McpShutdownReason): number {
112114
return FAILURE_REASONS.has(reason) ? 1 : 0;
113115
}
114116

115-
function throwIfErrors(name: string, errors: string[], errorCount?: number): void {
116-
const effectiveCount = Math.max(errorCount ?? 0, errors.length);
117-
if (effectiveCount > 0) {
118-
const detail = errors.length > 0 ? errors.join('; ') : 'no error details provided';
119-
throw new Error(`${name} reported ${effectiveCount} error(s): ${detail}`);
117+
function getCleanupDiagnostics(value: unknown): { count: number; messages: string[] } | null {
118+
if (value === null || typeof value !== 'object') {
119+
return null;
120120
}
121+
122+
const result = value as { errorCount?: unknown; errors?: unknown };
123+
const messages = Array.isArray(result.errors)
124+
? result.errors.filter((error): error is string => typeof error === 'string')
125+
: [];
126+
const explicitCount =
127+
typeof result.errorCount === 'number' && Number.isFinite(result.errorCount)
128+
? result.errorCount
129+
: 0;
130+
const count = Math.max(explicitCount, messages.length);
131+
132+
return count > 0 ? { count, messages } : null;
121133
}
122134

123135
function workspaceFilesystemCleanupTimeoutForOwnedSessions(ownedSessionCount: number): number {
@@ -162,6 +174,15 @@ export async function runMcpShutdown(input: {
162174
if (outcome.error) {
163175
step.error = outcome.error;
164176
}
177+
if (outcome.status === 'completed') {
178+
const diagnostics = getCleanupDiagnostics(outcome.value);
179+
if (diagnostics) {
180+
step.diagnosticCount = diagnostics.count;
181+
if (diagnostics.messages.length > 0) {
182+
step.diagnostics = diagnostics.messages;
183+
}
184+
}
185+
}
165186
steps.push(step);
166187
};
167188

@@ -210,29 +231,23 @@ export async function runMcpShutdown(input: {
210231
name: 'workspace-filesystem.cleanup-owned',
211232
timeoutMs: workspaceFilesystemCleanupTimeoutMs,
212233
operation: async (): Promise<unknown> => {
213-
const result = await cleanupOwnedWorkspaceFilesystemArtifacts({
234+
return cleanupOwnedWorkspaceFilesystemArtifacts({
214235
timeoutMs: STEP_TIMEOUT_MS,
215236
});
216-
throwIfErrors('workspace-filesystem.cleanup-owned', result.errors);
217-
return result;
218237
},
219238
},
220239
{
221240
name: 'video-capture.stop-all',
222241
timeoutMs: bulkStepTimeoutMs(input.snapshot.videoCaptureSessionCount),
223242
operation: async (): Promise<unknown> => {
224-
const result = await stopAllVideoCaptureSessions(STEP_TIMEOUT_MS);
225-
throwIfErrors('video-capture.stop-all', result.errors, result.errorCount);
226-
return result;
243+
return stopAllVideoCaptureSessions(STEP_TIMEOUT_MS);
227244
},
228245
},
229246
{
230247
name: 'swift-processes.stop-all',
231248
timeoutMs: bulkStepTimeoutMs(input.snapshot.swiftPackageProcessCount),
232249
operation: async (): Promise<unknown> => {
233-
const result = await stopAllTrackedProcesses(STEP_TIMEOUT_MS);
234-
throwIfErrors('swift-processes.stop-all', result.errors, result.errorCount);
235-
return result;
250+
return stopAllTrackedProcesses(STEP_TIMEOUT_MS);
236251
},
237252
},
238253
];
@@ -243,17 +258,22 @@ export async function runMcpShutdown(input: {
243258
}
244259

245260
const triggerError = input.error === undefined ? undefined : toErrorMessage(input.error);
246-
const cleanupFailureCount = steps.filter(
261+
const shutdownStepFailureCount = steps.filter(
247262
(step) => step.status === 'failed' || step.status === 'timed_out',
248263
).length;
264+
const cleanupDiagnosticCount = steps.reduce(
265+
(total, step) => total + (step.diagnosticCount ?? 0),
266+
0,
267+
);
249268

250269
captureMcpShutdownSummary({
251270
reason: input.reason,
252271
phase: input.snapshot.phase,
253272
exitCode,
254273
transportDisconnected,
255274
triggerError,
256-
cleanupFailureCount,
275+
shutdownStepFailureCount,
276+
cleanupDiagnosticCount,
257277
shutdownDurationMs: Date.now() - shutdownStartedAt,
258278
snapshot: input.snapshot as unknown as Record<string, unknown>,
259279
steps: steps as unknown as Array<Record<string, unknown>>,

src/utils/sentry.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ export interface McpShutdownSummaryEvent {
376376
exitCode: number;
377377
transportDisconnected: boolean;
378378
triggerError?: string;
379-
cleanupFailureCount: number;
379+
shutdownStepFailureCount: number;
380+
cleanupDiagnosticCount?: number;
380381
shutdownDurationMs: number;
381382
snapshot: Record<string, unknown>;
382383
steps: Array<Record<string, unknown>>;
@@ -415,7 +416,7 @@ export function captureMcpShutdownSummary(summary: McpShutdownSummaryEvent): voi
415416
let level: 'error' | 'warning' | 'info';
416417
if (isCrashReason) {
417418
level = 'error';
418-
} else if (summary.cleanupFailureCount > 0 || localAnomalyCount > 0) {
419+
} else if (summary.shutdownStepFailureCount > 0 || localAnomalyCount > 0) {
419420
level = 'warning';
420421
} else {
421422
level = 'info';
@@ -433,7 +434,8 @@ export function captureMcpShutdownSummary(summary: McpShutdownSummaryEvent): voi
433434
exitCode: summary.exitCode,
434435
transportDisconnected: summary.transportDisconnected,
435436
triggerError: summary.triggerError,
436-
cleanupFailureCount: summary.cleanupFailureCount,
437+
shutdownStepFailureCount: summary.shutdownStepFailureCount,
438+
cleanupDiagnosticCount: summary.cleanupDiagnosticCount,
437439
shutdownDurationMs: summary.shutdownDurationMs,
438440
snapshot: summary.snapshot,
439441
steps: summary.steps,

0 commit comments

Comments
 (0)