From d8a81f9c3cc36d8ffe66900ce96334378ffd555b Mon Sep 17 00:00:00 2001 From: bittoby <218712309+bittoby@users.noreply.github.com> Date: Thu, 21 May 2026 19:14:21 +0200 Subject: [PATCH] Fix data-root path boundary checks --- lib/extractions.ts | 3 +- lib/path-security.ts | 13 ++++++++ lib/pipeline/audit.ts | 7 +++-- lib/pipeline/schema-gen.ts | 3 +- lib/sources.ts | 3 +- tests/unit/path-security.test.ts | 23 ++++++++++++++ tests/unit/sources.test.ts | 53 +++++++++++++++++++++++++++++++- 7 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 lib/path-security.ts create mode 100644 tests/unit/path-security.test.ts diff --git a/lib/extractions.ts b/lib/extractions.ts index 2e5162b..7436e2f 100644 --- a/lib/extractions.ts +++ b/lib/extractions.ts @@ -82,6 +82,7 @@ export type StorageSummary = { export type ExtractionSummary = ServerSummary | StorageSummary; import { REPO_ROOT, KNOWN_CATEGORIES } from "./repo-walk"; +import { isPathInsideRoot } from "./path-security"; function unwrap(field: any): T | null { if (field == null) return null; @@ -291,7 +292,7 @@ export function resolveProductSourcePath(slug: string, manifestPath: string): st } for (const fp of candidates) { - if (!fp.startsWith(REPO_ROOT)) continue; + if (!isPathInsideRoot(REPO_ROOT, fp)) continue; if (fs.existsSync(fp) && fs.statSync(fp).isFile()) return fp; } return null; diff --git a/lib/path-security.ts b/lib/path-security.ts new file mode 100644 index 0000000..4411549 --- /dev/null +++ b/lib/path-security.ts @@ -0,0 +1,13 @@ +import path from "node:path"; + +export function isPathInsideRoot(root: string, candidate: string): boolean { + const resolvedRoot = path.resolve(root); + const resolvedCandidate = path.resolve(candidate); + const relative = path.relative(resolvedRoot, resolvedCandidate); + return ( + relative === "" || + (relative !== ".." && + !relative.startsWith(`..${path.sep}`) && + !path.isAbsolute(relative)) + ); +} diff --git a/lib/pipeline/audit.ts b/lib/pipeline/audit.ts index 2e9c9cd..5af3ad4 100644 --- a/lib/pipeline/audit.ts +++ b/lib/pipeline/audit.ts @@ -25,6 +25,7 @@ import path from "node:path"; import crypto from "node:crypto"; import { env } from "@/lib/env"; import { parseMarkdown } from "@/lib/safe-matter"; +import { isPathInsideRoot } from "@/lib/path-security"; const DATA_DIR = path.resolve(env.PRODUCT_MCP_DATA_DIR); @@ -419,7 +420,7 @@ export function auditCrossLinks(walk: PortfolioWalk): SubAuditReport { if (!target || typeof target !== "string") return null; const fromDir = path.dirname(fromMd); const abs = path.resolve(fromDir, target); - if (!abs.startsWith(DATA_DIR)) return null; + if (!isPathInsideRoot(DATA_DIR, abs)) return null; if (!fs.existsSync(abs)) return null; return abs; } @@ -626,7 +627,7 @@ export function auditOrphans(walk: PortfolioWalk): SubAuditReport { const v = row?.[k]; if (typeof v !== "string" || !v) continue; const abs = path.resolve(fromDir, v); - if (abs.startsWith(DATA_DIR)) referencedAbs.add(abs); + if (isPathInsideRoot(DATA_DIR, abs)) referencedAbs.add(abs); } } } @@ -718,7 +719,7 @@ export function auditManifests(walk: PortfolioWalk): SubAuditReport { } if (typeof v !== "string") continue; const abs = path.resolve(fromDir, v); - if (!abs.startsWith(DATA_DIR) || !fs.existsSync(abs)) { + if (!isPathInsideRoot(DATA_DIR, abs) || !fs.existsSync(abs)) { results.push({ status: "fail", subject, diff --git a/lib/pipeline/schema-gen.ts b/lib/pipeline/schema-gen.ts index 2c69584..d6e13a5 100644 --- a/lib/pipeline/schema-gen.ts +++ b/lib/pipeline/schema-gen.ts @@ -21,6 +21,7 @@ import { messagesCreate, } from "@/lib/integrations/anthropic"; import { getStudioDb, ensureStudioSchema } from "@/lib/db/client"; +import { isPathInsideRoot } from "@/lib/path-security"; const DATA_DIR = env.PRODUCT_MCP_DATA_DIR; const SCHEMAS_DIR = path.join(DATA_DIR, "schemas"); @@ -41,7 +42,7 @@ function resolveReferencePath(p: string): { abs: string; rel: string } { // Accept absolute paths inside DATA_DIR or already-relative paths. let abs = path.isAbsolute(p) ? p : path.resolve(DATA_DIR, p); abs = path.resolve(abs); - if (!abs.startsWith(path.resolve(DATA_DIR))) { + if (!isPathInsideRoot(DATA_DIR, abs)) { throw new Error(`Reference path escapes PRODUCT_MCP_DATA_DIR: ${p}`); } const rel = path.relative(DATA_DIR, abs); diff --git a/lib/sources.ts b/lib/sources.ts index 4eda699..84a4001 100644 --- a/lib/sources.ts +++ b/lib/sources.ts @@ -3,6 +3,7 @@ import path from "node:path"; import yaml from "js-yaml"; import { REPO_ROOT, walkDirs, parseProductRelDir } from "./repo-walk"; import { parseMarkdown } from "./safe-matter"; +import { isPathInsideRoot } from "./path-security"; export type SourceScope = "product" | "line" | "category"; @@ -35,7 +36,7 @@ export function classifyScope(localPath: string): SourceScope { export function resolveManifestPath(mdDir: string, localPath: string): string | null { if (!localPath) return null; const abs = path.resolve(mdDir, localPath); - if (!abs.startsWith(REPO_ROOT)) return null; + if (!isPathInsideRoot(REPO_ROOT, abs)) return null; return fs.existsSync(abs) ? abs : null; } diff --git a/tests/unit/path-security.test.ts b/tests/unit/path-security.test.ts new file mode 100644 index 0000000..78044e8 --- /dev/null +++ b/tests/unit/path-security.test.ts @@ -0,0 +1,23 @@ +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { isPathInsideRoot } from "@/lib/path-security"; + +describe("isPathInsideRoot", () => { + it("accepts the root itself and real descendants", () => { + const root = path.resolve("/tmp/product-data"); + expect(isPathInsideRoot(root, root)).toBe(true); + expect(isPathInsideRoot(root, path.join(root, "server/dell/r770.pdf"))).toBe(true); + }); + + it("rejects parent and sibling paths", () => { + const root = path.resolve("/tmp/product-data"); + expect(isPathInsideRoot(root, path.dirname(root))).toBe(false); + expect(isPathInsideRoot(root, `${root}-escape/leak.pdf`)).toBe(false); + }); + + it("does not reject in-root names that merely start with dots", () => { + const root = path.resolve("/tmp/product-data"); + expect(isPathInsideRoot(root, path.join(root, "..not-parent/file.pdf"))).toBe(true); + }); +}); + diff --git a/tests/unit/sources.test.ts b/tests/unit/sources.test.ts index 98574d1..dab3f77 100644 --- a/tests/unit/sources.test.ts +++ b/tests/unit/sources.test.ts @@ -1,6 +1,22 @@ -import { describe, it, expect } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, it, expect, vi } from "vitest"; import { classifyScope, formatBytes } from "@/lib/sources"; +const originalDataDir = process.env.PRODUCT_MCP_DATA_DIR; +const tempDirs: string[] = []; + +afterEach(() => { + if (originalDataDir === undefined) delete process.env.PRODUCT_MCP_DATA_DIR; + else process.env.PRODUCT_MCP_DATA_DIR = originalDataDir; + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + fs.rmSync(`${dir}-escape`, { recursive: true, force: true }); + } + vi.resetModules(); +}); + describe("classifyScope (manifest local: → scope)", () => { it("classifies bare or source-prefixed paths as product scope", () => { expect(classifyScope("source/spec-sheet.pdf")).toBe("product"); @@ -45,3 +61,38 @@ describe("formatBytes", () => { expect(formatBytes(3.25 * 1024 ** 3)).toBe("3.25 GB"); }); }); + +describe("resolveManifestPath", () => { + it("rejects sibling directories with the same string prefix as the data root", async () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "pdex-root-")); + tempDirs.push(root); + const productDir = path.join(root, "server", "dell", "poweredge", "r770"); + const escapeDir = `${root}-escape`; + fs.mkdirSync(productDir, { recursive: true }); + fs.mkdirSync(escapeDir, { recursive: true }); + const leakPath = path.join(escapeDir, "leak.pdf"); + fs.writeFileSync(leakPath, "not really a pdf"); + + process.env.PRODUCT_MCP_DATA_DIR = root; + vi.resetModules(); + const { resolveManifestPath } = await import("@/lib/sources"); + + const manifestPath = path.relative(productDir, leakPath); + expect(resolveManifestPath(productDir, manifestPath)).toBeNull(); + }); + + it("still resolves files that are actually inside the data root", async () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "pdex-root-")); + tempDirs.push(root); + const productDir = path.join(root, "server", "dell", "poweredge", "r770"); + const sourcePath = path.join(productDir, "source", "spec-sheet.pdf"); + fs.mkdirSync(path.dirname(sourcePath), { recursive: true }); + fs.writeFileSync(sourcePath, "not really a pdf"); + + process.env.PRODUCT_MCP_DATA_DIR = root; + vi.resetModules(); + const { resolveManifestPath } = await import("@/lib/sources"); + + expect(resolveManifestPath(productDir, "source/spec-sheet.pdf")).toBe(sourcePath); + }); +});