Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 65 additions & 2 deletions packages/server/src/server/pid-lock.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
import { mkdtemp, rm } from "node:fs/promises";
import { tmpdir } from "node:os";
import { execFileSync } from "node:child_process";
import { mkdtemp, rm, writeFile } from "node:fs/promises";
import { hostname, tmpdir } from "node:os";
import { join } from "node:path";
import { describe, expect, test } from "vitest";

import { acquirePidLock, getPidLockInfo, releasePidLock, updatePidLock } from "./pid-lock.js";

// Independently derive the real OS start time of a live PID, so the staleness
// tests don't depend on the implementation's own start-time helper.
function realProcessStartIso(pid: number): string {
const out = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], {
env: { ...process.env, LC_ALL: "C" },
encoding: "utf8",
}).trim();
return new Date(out).toISOString();
}

describe("pid-lock ownership", () => {
test("writes and releases lock for explicit owner pid", async () => {
const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-owner-"));
Expand Down Expand Up @@ -50,3 +61,55 @@ describe("pid-lock ownership", () => {
}
});
});

describe("pid-lock staleness (PID reuse)", () => {
test("treats lock as stale when its PID was recycled by a different process", async () => {
const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-reuse-"));

try {
// Reproduce the real incident: the lock points at a PID that IS alive
// (here, this test process — like a recycled `printtool`), but it is not
// the daemon. Its recorded startedAt is long before that process began.
const staleLock = {
pid: process.pid,
startedAt: "2020-01-01T00:00:00.000Z",
hostname: hostname(),
uid: process.getuid?.() ?? 0,
listen: "127.0.0.1:6767",
};
await writeFile(join(paseoHome, "paseo.pid"), JSON.stringify(staleLock));

const ownerPid = process.pid + 10_000;
await acquirePidLock(paseoHome, null, { ownerPid });

const lock = await getPidLockInfo(paseoHome);
expect(lock?.pid).toBe(ownerPid);
} finally {
await rm(paseoHome, { recursive: true, force: true });
}
});

test("still rejects when the lock PID is the same live process", async () => {
const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-live-"));

try {
// A genuinely-live daemon: its recorded startedAt matches the real OS
// start time of the PID. The guard must still reject a second acquirer.
const liveLock = {
pid: process.pid,
startedAt: realProcessStartIso(process.pid),
hostname: hostname(),
uid: process.getuid?.() ?? 0,
listen: "127.0.0.1:6767",
};
await writeFile(join(paseoHome, "paseo.pid"), JSON.stringify(liveLock));

const ownerPid = process.pid + 10_000;
await expect(acquirePidLock(paseoHome, null, { ownerPid })).rejects.toThrow(
/already running/,
);
} finally {
await rm(paseoHome, { recursive: true, force: true });
}
});
});
58 changes: 52 additions & 6 deletions packages/server/src/server/pid-lock.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
import { execFileSync } from "node:child_process";
import { open, readFile, unlink, mkdir } from "node:fs/promises";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { hostname } from "node:os";
import { z } from "zod";

// The OS reuses PIDs. A stale lock left by an unclean daemon shutdown can name a
// PID the OS has since handed to an unrelated process, so a bare "is this PID
// alive?" check is not enough to prove the daemon is still running. If the live
// process started materially later than the lock was written, the PID was
// recycled and the lock is stale. lstart is second-granularity and the lock is
// written a beat after the daemon starts, so a genuine daemon's two timestamps
// sit within a few seconds; a recycled PID differs by the daemon's whole lifetime.
const PID_REUSE_TOLERANCE_MS = 60_000;

export const pidLockInfoSchema = z.object({
pid: z.number(),
startedAt: z.string(),
Expand Down Expand Up @@ -43,6 +53,42 @@ function isPidRunning(pid: number): boolean {
}
}

// Wall-clock start time of a live process, or null if it can't be determined
// (process gone, or `ps` unavailable e.g. on Windows). `ps -o lstart` is the
// portable keyword present on both macOS (BSD) and Linux; LC_ALL=C forces an
// English, Date.parse-able timestamp regardless of the user's locale.
function getProcessStartTimeMs(pid: number): number | null {
try {
const output = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], {
encoding: "utf8",
env: { ...process.env, LC_ALL: "C" },
}).trim();
if (!output) {
return null;
}
const parsed = Date.parse(output);
return Number.isNaN(parsed) ? null : parsed;
} catch {
return null;
}
}

// Whether the lock's PID still belongs to the daemon that wrote the lock, as
// opposed to an unrelated process that inherited the PID after reuse.
function isLockProcessAlive(lock: PidLockInfo): boolean {
if (!isPidRunning(lock.pid)) {
return false;
}
const liveStartMs = getProcessStartTimeMs(lock.pid);
const lockStartMs = Date.parse(lock.startedAt);
if (liveStartMs === null || Number.isNaN(lockStartMs)) {
// Can't compare start times — stay conservative and assume the daemon is
// still running rather than risk launching a second one.
return true;
}
return Math.abs(liveStartMs - lockStartMs) <= PID_REUSE_TOLERANCE_MS;
}

function getPidFilePath(paseoHome: string): string {
return join(paseoHome, "paseo.pid");
}
Expand Down Expand Up @@ -78,17 +124,17 @@ export async function acquirePidLock(
// Check if existing lock is stale
const lockOwnerPid = resolveOwnerPid(options?.ownerPid);
if (existingLock) {
if (isPidRunning(existingLock.pid)) {
if (existingLock.pid === lockOwnerPid) {
return;
}
if (existingLock.pid === lockOwnerPid && isPidRunning(existingLock.pid)) {
return;
}

if (isLockProcessAlive(existingLock)) {
throw new PidLockError(
`Another Paseo daemon is already running (PID ${existingLock.pid}, started ${existingLock.startedAt})`,
existingLock,
);
}
// Stale lock - remove it
// Stale lock (process gone, or its PID was recycled by another process) - remove it
await unlink(pidPath).catch(() => {});
}

Expand Down Expand Up @@ -197,7 +243,7 @@ export async function isLocked(
if (!info) {
return { locked: false };
}
if (!isPidRunning(info.pid)) {
if (!isLockProcessAlive(info)) {
return { locked: false, info };
}
return { locked: true, info };
Expand Down