Skip to content

Commit 4948dad

Browse files
cameroncookecodex
andcommitted
fix: Harden workspace cleanup lifecycle
Treat inaccessible PIDs as live, require daemon instance identity before live-owner cleanup, and make force-stop verify the current registry entry before signaling or unregistering daemon files. Move MCP startup cleanup off the stdio critical path and report embedded shutdown cleanup errors as failures. Normalize unstable Swift Testing snapshot progress while preserving stable failure summaries. Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 9b10d3b commit 4948dad

29 files changed

Lines changed: 871 additions & 430 deletions
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2+
import type { DaemonRegistryEntry } from '../../daemon/daemon-registry.ts';
3+
4+
const originalEntry: DaemonRegistryEntry = {
5+
workspaceKey: 'workspace-a',
6+
workspaceRoot: '/workspaces/workspace-a',
7+
socketPath: '/tmp/xcodebuildmcp-daemon.sock',
8+
pid: 123_456,
9+
startedAt: '2026-05-05T00:00:00.000Z',
10+
enabledWorkflows: ['build'],
11+
version: '1.0.0',
12+
instanceId: 'daemon-instance-a',
13+
};
14+
15+
const changedEntry: DaemonRegistryEntry = {
16+
...originalEntry,
17+
instanceId: 'daemon-instance-b',
18+
};
19+
20+
const registryMocks = vi.hoisted(() => ({
21+
cleanupWorkspaceDaemonFiles: vi.fn(),
22+
findDaemonRegistryEntryBySocketPath: vi.fn(),
23+
isPidAlive: vi.fn(),
24+
readDaemonRegistryEntry: vi.fn(),
25+
release: vi.fn(),
26+
}));
27+
28+
vi.mock('../../daemon/daemon-registry.ts', () => ({
29+
acquireDaemonRegistryMutationLock: vi.fn(() => ({
30+
workspaceKey: 'workspace-a',
31+
release: registryMocks.release,
32+
})),
33+
cleanupWorkspaceDaemonFiles: registryMocks.cleanupWorkspaceDaemonFiles,
34+
findDaemonRegistryEntryBySocketPath: registryMocks.findDaemonRegistryEntryBySocketPath,
35+
readDaemonRegistryEntry: registryMocks.readDaemonRegistryEntry,
36+
}));
37+
38+
vi.mock('../../utils/process-liveness.ts', () => ({
39+
isPidAlive: registryMocks.isPidAlive,
40+
}));
41+
42+
import { forceStopDaemon } from '../daemon-control.ts';
43+
44+
describe('daemon control force-stop registry races', () => {
45+
beforeEach(() => {
46+
registryMocks.cleanupWorkspaceDaemonFiles.mockReset();
47+
registryMocks.findDaemonRegistryEntryBySocketPath.mockReset();
48+
registryMocks.isPidAlive.mockReset();
49+
registryMocks.readDaemonRegistryEntry.mockReset();
50+
registryMocks.release.mockReset();
51+
});
52+
53+
afterEach(() => {
54+
vi.restoreAllMocks();
55+
});
56+
57+
it('sends the initial signal before releasing the registry mutation lock', async () => {
58+
registryMocks.findDaemonRegistryEntryBySocketPath.mockReturnValue(originalEntry);
59+
registryMocks.readDaemonRegistryEntry.mockReturnValue(originalEntry);
60+
registryMocks.isPidAlive.mockReturnValue(false);
61+
const kill = vi.spyOn(process, 'kill').mockImplementation(() => {
62+
expect(registryMocks.release).not.toHaveBeenCalled();
63+
return true;
64+
});
65+
66+
await forceStopDaemon(originalEntry.socketPath);
67+
68+
expect(kill).toHaveBeenCalledWith(originalEntry.pid, 'SIGTERM');
69+
expect(registryMocks.release).toHaveBeenCalledOnce();
70+
expect(registryMocks.cleanupWorkspaceDaemonFiles).toHaveBeenCalledWith(
71+
originalEntry.workspaceKey,
72+
{
73+
pid: originalEntry.pid,
74+
socketPath: originalEntry.socketPath,
75+
instanceId: originalEntry.instanceId,
76+
},
77+
);
78+
});
79+
80+
it('does not signal when daemon metadata changes before the initial signal', async () => {
81+
registryMocks.findDaemonRegistryEntryBySocketPath.mockReturnValue(originalEntry);
82+
registryMocks.readDaemonRegistryEntry.mockReturnValue(changedEntry);
83+
const kill = vi.spyOn(process, 'kill').mockImplementation(() => true);
84+
85+
await expect(forceStopDaemon(originalEntry.socketPath)).rejects.toThrow(
86+
'daemon registry metadata changed',
87+
);
88+
89+
expect(kill).not.toHaveBeenCalled();
90+
expect(registryMocks.cleanupWorkspaceDaemonFiles).not.toHaveBeenCalled();
91+
expect(registryMocks.release).toHaveBeenCalledOnce();
92+
});
93+
});
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2+
import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
3+
import { tmpdir } from 'node:os';
4+
import * as path from 'node:path';
5+
import { forceStopDaemon } from '../daemon-control.ts';
6+
import {
7+
readDaemonRegistryEntry,
8+
type DaemonRegistryEntry,
9+
writeDaemonRegistryEntry,
10+
} from '../../daemon/daemon-registry.ts';
11+
import {
12+
daemonDirForWorkspaceKey,
13+
setDaemonRunDirOverrideForTests,
14+
} from '../../daemon/socket-path.ts';
15+
import { setXcodeBuildMCPAppDirOverrideForTests } from '../../utils/log-paths.ts';
16+
17+
const daemonPid = 123_456;
18+
19+
function createMissingPidError(): NodeJS.ErrnoException {
20+
const error = new Error('no such process') as NodeJS.ErrnoException;
21+
error.code = 'ESRCH';
22+
return error;
23+
}
24+
25+
function createEntry(overrides: Partial<DaemonRegistryEntry> = {}): DaemonRegistryEntry {
26+
const workspaceKey = overrides.workspaceKey ?? 'workspace-a';
27+
return {
28+
workspaceKey,
29+
workspaceRoot: `/workspaces/${workspaceKey}`,
30+
socketPath: path.join(daemonDirForWorkspaceKey(workspaceKey), 'd.sock'),
31+
pid: daemonPid,
32+
startedAt: '2026-05-05T00:00:00.000Z',
33+
enabledWorkflows: ['build'],
34+
version: '1.0.0',
35+
instanceId: 'daemon-instance-a',
36+
...overrides,
37+
};
38+
}
39+
40+
describe('daemon control', () => {
41+
let appDir: string;
42+
let daemonRunDir: string;
43+
44+
beforeEach(() => {
45+
appDir = mkdtempSync(path.join(tmpdir(), 'xcodebuildmcp-daemon-control-app-'));
46+
daemonRunDir = mkdtempSync(path.join(tmpdir(), 'xcodebuildmcp-daemon-control-run-'));
47+
setXcodeBuildMCPAppDirOverrideForTests(appDir);
48+
setDaemonRunDirOverrideForTests(daemonRunDir);
49+
});
50+
51+
afterEach(() => {
52+
vi.useRealTimers();
53+
vi.restoreAllMocks();
54+
setXcodeBuildMCPAppDirOverrideForTests(null);
55+
setDaemonRunDirOverrideForTests(null);
56+
rmSync(appDir, { recursive: true, force: true });
57+
rmSync(daemonRunDir, { recursive: true, force: true });
58+
});
59+
60+
it('does not unlink sockets without registry metadata', async () => {
61+
const socketPath = path.join(daemonRunDir, 'missing-registry.sock');
62+
mkdirSync(path.dirname(socketPath), { recursive: true, mode: 0o700 });
63+
writeFileSync(socketPath, 'socket placeholder');
64+
65+
await expect(forceStopDaemon(socketPath)).rejects.toThrow('registry metadata is missing');
66+
67+
expect(existsSync(socketPath)).toBe(true);
68+
});
69+
70+
it('unregisters daemon files only after SIGTERM stops the process', async () => {
71+
const entry = createEntry();
72+
writeDaemonRegistryEntry(entry);
73+
mkdirSync(path.dirname(entry.socketPath), { recursive: true, mode: 0o700 });
74+
writeFileSync(entry.socketPath, 'socket placeholder');
75+
let alive = true;
76+
const kill = vi.spyOn(process, 'kill').mockImplementation(((
77+
_pid: number,
78+
signal?: string | number,
79+
) => {
80+
if (signal === 0) {
81+
if (alive) {
82+
return true;
83+
}
84+
throw createMissingPidError();
85+
}
86+
if (signal === 'SIGTERM') {
87+
alive = false;
88+
}
89+
return true;
90+
}) as typeof process.kill);
91+
92+
await forceStopDaemon(entry.socketPath);
93+
94+
expect(kill).toHaveBeenCalledWith(entry.pid, 'SIGTERM');
95+
expect(readDaemonRegistryEntry(entry.workspaceKey)).toBeNull();
96+
expect(existsSync(entry.socketPath)).toBe(false);
97+
});
98+
99+
it('uses SIGKILL when the process stays alive after SIGTERM', async () => {
100+
vi.useFakeTimers();
101+
const entry = createEntry();
102+
writeDaemonRegistryEntry(entry);
103+
mkdirSync(path.dirname(entry.socketPath), { recursive: true, mode: 0o700 });
104+
writeFileSync(entry.socketPath, 'socket placeholder');
105+
let alive = true;
106+
const kill = vi.spyOn(process, 'kill').mockImplementation(((
107+
_pid: number,
108+
signal?: string | number,
109+
) => {
110+
if (signal === 0) {
111+
if (alive) {
112+
return true;
113+
}
114+
throw createMissingPidError();
115+
}
116+
if (signal === 'SIGKILL') {
117+
alive = false;
118+
}
119+
return true;
120+
}) as typeof process.kill);
121+
122+
const stopped = forceStopDaemon(entry.socketPath);
123+
await vi.advanceTimersByTimeAsync(1500);
124+
await stopped;
125+
126+
expect(kill).toHaveBeenCalledWith(entry.pid, 'SIGTERM');
127+
expect(kill).toHaveBeenCalledWith(entry.pid, 'SIGKILL');
128+
expect(readDaemonRegistryEntry(entry.workspaceKey)).toBeNull();
129+
expect(existsSync(entry.socketPath)).toBe(false);
130+
});
131+
132+
it('preserves registry metadata and socket when the process remains alive', async () => {
133+
vi.useFakeTimers();
134+
const entry = createEntry();
135+
writeDaemonRegistryEntry(entry);
136+
mkdirSync(path.dirname(entry.socketPath), { recursive: true, mode: 0o700 });
137+
writeFileSync(entry.socketPath, 'socket placeholder');
138+
vi.spyOn(process, 'kill').mockImplementation((() => true) as typeof process.kill);
139+
140+
const stopped = forceStopDaemon(entry.socketPath);
141+
const stoppedExpectation = expect(stopped).rejects.toThrow(
142+
`Daemon PID ${entry.pid} did not exit after SIGKILL`,
143+
);
144+
await vi.advanceTimersByTimeAsync(3000);
145+
146+
await stoppedExpectation;
147+
expect(readDaemonRegistryEntry(entry.workspaceKey)).toEqual(entry);
148+
expect(existsSync(entry.socketPath)).toBe(true);
149+
});
150+
});

src/cli/daemon-control.ts

Lines changed: 77 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import { spawn } from 'node:child_process';
22
import { fileURLToPath } from 'node:url';
33
import { dirname, resolve } from 'node:path';
4-
import { existsSync, unlinkSync } from 'node:fs';
4+
import { existsSync } from 'node:fs';
55
import { DaemonClient, DaemonVersionMismatchError } from './daemon-client.ts';
66
import {
7+
acquireDaemonRegistryMutationLock,
78
cleanupWorkspaceDaemonFiles,
89
findDaemonRegistryEntryBySocketPath,
10+
readDaemonRegistryEntry,
11+
type DaemonRegistryEntry,
912
} from '../daemon/daemon-registry.ts';
13+
import { isPidAlive } from '../utils/process-liveness.ts';
1014

1115
/**
1216
* Default timeout for daemon startup in milliseconds.
@@ -18,6 +22,50 @@ export const DEFAULT_DAEMON_STARTUP_TIMEOUT_MS = 5000;
1822
*/
1923
export const DEFAULT_POLL_INTERVAL_MS = 100;
2024

25+
const FORCE_STOP_SIGNAL_TIMEOUT_MS = 1500;
26+
const FORCE_STOP_POLL_INTERVAL_MS = 50;
27+
28+
async function waitForPidExit(pid: number, timeoutMs: number): Promise<boolean> {
29+
const deadline = Date.now() + timeoutMs;
30+
while (Date.now() < deadline) {
31+
if (!isPidAlive(pid)) {
32+
return true;
33+
}
34+
await new Promise((resolveDelay) => setTimeout(resolveDelay, FORCE_STOP_POLL_INTERVAL_MS));
35+
}
36+
return !isPidAlive(pid);
37+
}
38+
39+
function validateCurrentRegistryEntry(
40+
expectedEntry: DaemonRegistryEntry,
41+
currentEntry: DaemonRegistryEntry | null,
42+
socketPath: string,
43+
): void {
44+
const matchesExpectedEntry =
45+
currentEntry !== null &&
46+
currentEntry.workspaceKey === expectedEntry.workspaceKey &&
47+
currentEntry.socketPath === expectedEntry.socketPath &&
48+
currentEntry.pid === expectedEntry.pid &&
49+
currentEntry.instanceId === expectedEntry.instanceId;
50+
51+
if (!matchesExpectedEntry) {
52+
throw new Error(`Cannot force-stop daemon at ${socketPath}: daemon registry metadata changed`);
53+
}
54+
}
55+
56+
function signalDaemonPid(pid: number, signal: NodeJS.Signals): boolean {
57+
try {
58+
process.kill(pid, signal);
59+
return true;
60+
} catch (error) {
61+
if ((error as NodeJS.ErrnoException).code === 'ESRCH') {
62+
return false;
63+
}
64+
const message = error instanceof Error ? error.message : String(error);
65+
throw new Error(`Failed to send ${signal} to daemon PID ${pid}: ${message}`);
66+
}
67+
}
68+
2169
/**
2270
* Get the path to the daemon executable.
2371
*/
@@ -36,35 +84,40 @@ export function getDaemonExecutablePath(): string {
3684

3785
/**
3886
* Force-stop a daemon that cannot be stopped gracefully (e.g. protocol version mismatch).
39-
* Derives the workspace key from the socket path, reads the registry for the PID,
40-
* sends SIGTERM, and removes the stale socket.
87+
* Uses registry ownership metadata to stop the process before unregistering daemon files.
4188
*/
4289
export async function forceStopDaemon(socketPath: string): Promise<void> {
4390
const entry = findDaemonRegistryEntryBySocketPath(socketPath);
44-
if (entry?.pid) {
45-
try {
46-
process.kill(entry.pid, 'SIGTERM');
47-
} catch {
48-
// Process may already be gone.
49-
}
50-
// Brief wait for the process to exit.
51-
await new Promise((resolve) => setTimeout(resolve, 500));
91+
if (!entry) {
92+
throw new Error(
93+
`Cannot force-stop daemon at ${socketPath}: daemon registry metadata is missing`,
94+
);
5295
}
53-
if (entry) {
54-
cleanupWorkspaceDaemonFiles(entry.workspaceKey, {
55-
pid: entry.pid,
56-
socketPath,
57-
allowLiveOwner: true,
58-
});
59-
} else {
60-
// Registry entry missing; cannot derive workspace key from socket path alone.
61-
// Clean up the socket file directly.
62-
try {
63-
unlinkSync(socketPath);
64-
} catch {
65-
// Socket may already be gone.
96+
97+
const lock = acquireDaemonRegistryMutationLock(entry.workspaceKey);
98+
if (!lock) {
99+
throw new Error(`Unable to acquire daemon registry lock for ${entry.workspaceKey}`);
100+
}
101+
102+
let termSent: boolean;
103+
try {
104+
validateCurrentRegistryEntry(entry, readDaemonRegistryEntry(entry.workspaceKey), socketPath);
105+
termSent = signalDaemonPid(entry.pid, 'SIGTERM');
106+
} finally {
107+
lock.release();
108+
}
109+
if (termSent && !(await waitForPidExit(entry.pid, FORCE_STOP_SIGNAL_TIMEOUT_MS))) {
110+
const killSent = signalDaemonPid(entry.pid, 'SIGKILL');
111+
if (killSent && !(await waitForPidExit(entry.pid, FORCE_STOP_SIGNAL_TIMEOUT_MS))) {
112+
throw new Error(`Daemon PID ${entry.pid} did not exit after SIGKILL`);
66113
}
67114
}
115+
116+
cleanupWorkspaceDaemonFiles(entry.workspaceKey, {
117+
pid: entry.pid,
118+
socketPath,
119+
instanceId: entry.instanceId,
120+
});
68121
}
69122

70123
export interface StartDaemonBackgroundOptions {

0 commit comments

Comments
 (0)