From 241a529efb9db4f9e58e9bc9989bc65e8b59b1af Mon Sep 17 00:00:00 2001 From: chosen1hyj <770104718@qq.com> Date: Wed, 3 Jun 2026 14:32:13 +0800 Subject: [PATCH] fix(server): don't treat a recycled PID as a running daemon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit acquirePidLock and isLocked only checked whether the lock file's PID was alive via process.kill(pid, 0). After an unclean daemon shutdown the stale paseo.pid is left behind; once the OS recycles that PID to an unrelated process the liveness check still passes, so the daemon refuses to start ("Another Paseo daemon is already running") and never binds its port — every client (desktop and CLI) hangs on "connecting". Verify process identity by comparing the live process's start time (ps -o lstart) against the lock's startedAt. If they diverge beyond a 60s tolerance the PID was reused and the lock is stale, so we clear it and start. Falls back to the previous conservative behaviour when the start time can't be determined (e.g. Windows). --- packages/server/src/server/pid-lock.test.ts | 67 ++++++++++++++++++++- packages/server/src/server/pid-lock.ts | 58 ++++++++++++++++-- 2 files changed, 117 insertions(+), 8 deletions(-) diff --git a/packages/server/src/server/pid-lock.test.ts b/packages/server/src/server/pid-lock.test.ts index 527655e1b6..646ffcd1a1 100644 --- a/packages/server/src/server/pid-lock.test.ts +++ b/packages/server/src/server/pid-lock.test.ts @@ -1,10 +1,21 @@ -import { mkdtemp, rm } from "node:fs/promises"; -import { tmpdir } from "node:os"; +import { execFileSync } from "node:child_process"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { hostname, tmpdir } from "node:os"; import { join } from "node:path"; import { describe, expect, test } from "vitest"; import { acquirePidLock, getPidLockInfo, releasePidLock, updatePidLock } from "./pid-lock.js"; +// Independently derive the real OS start time of a live PID, so the staleness +// tests don't depend on the implementation's own start-time helper. +function realProcessStartIso(pid: number): string { + const out = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], { + env: { ...process.env, LC_ALL: "C" }, + encoding: "utf8", + }).trim(); + return new Date(out).toISOString(); +} + describe("pid-lock ownership", () => { test("writes and releases lock for explicit owner pid", async () => { const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-owner-")); @@ -50,3 +61,55 @@ describe("pid-lock ownership", () => { } }); }); + +describe("pid-lock staleness (PID reuse)", () => { + test("treats lock as stale when its PID was recycled by a different process", async () => { + const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-reuse-")); + + try { + // Reproduce the real incident: the lock points at a PID that IS alive + // (here, this test process — like a recycled `printtool`), but it is not + // the daemon. Its recorded startedAt is long before that process began. + const staleLock = { + pid: process.pid, + startedAt: "2020-01-01T00:00:00.000Z", + hostname: hostname(), + uid: process.getuid?.() ?? 0, + listen: "127.0.0.1:6767", + }; + await writeFile(join(paseoHome, "paseo.pid"), JSON.stringify(staleLock)); + + const ownerPid = process.pid + 10_000; + await acquirePidLock(paseoHome, null, { ownerPid }); + + const lock = await getPidLockInfo(paseoHome); + expect(lock?.pid).toBe(ownerPid); + } finally { + await rm(paseoHome, { recursive: true, force: true }); + } + }); + + test("still rejects when the lock PID is the same live process", async () => { + const paseoHome = await mkdtemp(join(tmpdir(), "paseo-pid-lock-live-")); + + try { + // A genuinely-live daemon: its recorded startedAt matches the real OS + // start time of the PID. The guard must still reject a second acquirer. + const liveLock = { + pid: process.pid, + startedAt: realProcessStartIso(process.pid), + hostname: hostname(), + uid: process.getuid?.() ?? 0, + listen: "127.0.0.1:6767", + }; + await writeFile(join(paseoHome, "paseo.pid"), JSON.stringify(liveLock)); + + const ownerPid = process.pid + 10_000; + await expect(acquirePidLock(paseoHome, null, { ownerPid })).rejects.toThrow( + /already running/, + ); + } finally { + await rm(paseoHome, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/server/src/server/pid-lock.ts b/packages/server/src/server/pid-lock.ts index 96d663e891..2de9bf1236 100644 --- a/packages/server/src/server/pid-lock.ts +++ b/packages/server/src/server/pid-lock.ts @@ -1,9 +1,19 @@ +import { execFileSync } from "node:child_process"; import { open, readFile, unlink, mkdir } from "node:fs/promises"; import { existsSync } from "node:fs"; import { join } from "node:path"; import { hostname } from "node:os"; import { z } from "zod"; +// The OS reuses PIDs. A stale lock left by an unclean daemon shutdown can name a +// PID the OS has since handed to an unrelated process, so a bare "is this PID +// alive?" check is not enough to prove the daemon is still running. If the live +// process started materially later than the lock was written, the PID was +// recycled and the lock is stale. lstart is second-granularity and the lock is +// written a beat after the daemon starts, so a genuine daemon's two timestamps +// sit within a few seconds; a recycled PID differs by the daemon's whole lifetime. +const PID_REUSE_TOLERANCE_MS = 60_000; + export const pidLockInfoSchema = z.object({ pid: z.number(), startedAt: z.string(), @@ -43,6 +53,42 @@ function isPidRunning(pid: number): boolean { } } +// Wall-clock start time of a live process, or null if it can't be determined +// (process gone, or `ps` unavailable e.g. on Windows). `ps -o lstart` is the +// portable keyword present on both macOS (BSD) and Linux; LC_ALL=C forces an +// English, Date.parse-able timestamp regardless of the user's locale. +function getProcessStartTimeMs(pid: number): number | null { + try { + const output = execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], { + encoding: "utf8", + env: { ...process.env, LC_ALL: "C" }, + }).trim(); + if (!output) { + return null; + } + const parsed = Date.parse(output); + return Number.isNaN(parsed) ? null : parsed; + } catch { + return null; + } +} + +// Whether the lock's PID still belongs to the daemon that wrote the lock, as +// opposed to an unrelated process that inherited the PID after reuse. +function isLockProcessAlive(lock: PidLockInfo): boolean { + if (!isPidRunning(lock.pid)) { + return false; + } + const liveStartMs = getProcessStartTimeMs(lock.pid); + const lockStartMs = Date.parse(lock.startedAt); + if (liveStartMs === null || Number.isNaN(lockStartMs)) { + // Can't compare start times — stay conservative and assume the daemon is + // still running rather than risk launching a second one. + return true; + } + return Math.abs(liveStartMs - lockStartMs) <= PID_REUSE_TOLERANCE_MS; +} + function getPidFilePath(paseoHome: string): string { return join(paseoHome, "paseo.pid"); } @@ -78,17 +124,17 @@ export async function acquirePidLock( // Check if existing lock is stale const lockOwnerPid = resolveOwnerPid(options?.ownerPid); if (existingLock) { - if (isPidRunning(existingLock.pid)) { - if (existingLock.pid === lockOwnerPid) { - return; - } + if (existingLock.pid === lockOwnerPid && isPidRunning(existingLock.pid)) { + return; + } + if (isLockProcessAlive(existingLock)) { throw new PidLockError( `Another Paseo daemon is already running (PID ${existingLock.pid}, started ${existingLock.startedAt})`, existingLock, ); } - // Stale lock - remove it + // Stale lock (process gone, or its PID was recycled by another process) - remove it await unlink(pidPath).catch(() => {}); } @@ -197,7 +243,7 @@ export async function isLocked( if (!info) { return { locked: false }; } - if (!isPidRunning(info.pid)) { + if (!isLockProcessAlive(info)) { return { locked: false, info }; } return { locked: true, info };