Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/extractions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ export type StorageSummary = {
export type ExtractionSummary = ServerSummary | StorageSummary;

import { REPO_ROOT, KNOWN_CATEGORIES } from "./repo-walk";
import { isPathInsideRoot } from "./path-security";

function unwrap<T = any>(field: any): T | null {
if (field == null) return null;
Expand Down Expand Up @@ -291,7 +292,7 @@ export function resolveProductSourcePath(slug: string, manifestPath: string): st
}

for (const fp of candidates) {
if (!fp.startsWith(REPO_ROOT)) continue;
if (!isPathInsideRoot(REPO_ROOT, fp)) continue;
if (fs.existsSync(fp) && fs.statSync(fp).isFile()) return fp;
}
return null;
Expand Down
13 changes: 13 additions & 0 deletions lib/path-security.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import path from "node:path";

export function isPathInsideRoot(root: string, candidate: string): boolean {
const resolvedRoot = path.resolve(root);
const resolvedCandidate = path.resolve(candidate);
const relative = path.relative(resolvedRoot, resolvedCandidate);
return (
relative === "" ||
(relative !== ".." &&
!relative.startsWith(`..${path.sep}`) &&
!path.isAbsolute(relative))
);
}
7 changes: 4 additions & 3 deletions lib/pipeline/audit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import path from "node:path";
import crypto from "node:crypto";
import { env } from "@/lib/env";
import { parseMarkdown } from "@/lib/safe-matter";
import { isPathInsideRoot } from "@/lib/path-security";

const DATA_DIR = path.resolve(env.PRODUCT_MCP_DATA_DIR);

Expand Down Expand Up @@ -419,7 +420,7 @@ export function auditCrossLinks(walk: PortfolioWalk): SubAuditReport {
if (!target || typeof target !== "string") return null;
const fromDir = path.dirname(fromMd);
const abs = path.resolve(fromDir, target);
if (!abs.startsWith(DATA_DIR)) return null;
if (!isPathInsideRoot(DATA_DIR, abs)) return null;
if (!fs.existsSync(abs)) return null;
return abs;
}
Expand Down Expand Up @@ -626,7 +627,7 @@ export function auditOrphans(walk: PortfolioWalk): SubAuditReport {
const v = row?.[k];
if (typeof v !== "string" || !v) continue;
const abs = path.resolve(fromDir, v);
if (abs.startsWith(DATA_DIR)) referencedAbs.add(abs);
if (isPathInsideRoot(DATA_DIR, abs)) referencedAbs.add(abs);
}
}
}
Expand Down Expand Up @@ -718,7 +719,7 @@ export function auditManifests(walk: PortfolioWalk): SubAuditReport {
}
if (typeof v !== "string") continue;
const abs = path.resolve(fromDir, v);
if (!abs.startsWith(DATA_DIR) || !fs.existsSync(abs)) {
if (!isPathInsideRoot(DATA_DIR, abs) || !fs.existsSync(abs)) {
results.push({
status: "fail",
subject,
Expand Down
3 changes: 2 additions & 1 deletion lib/pipeline/schema-gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
messagesCreate,
} from "@/lib/integrations/anthropic";
import { getStudioDb, ensureStudioSchema } from "@/lib/db/client";
import { isPathInsideRoot } from "@/lib/path-security";

const DATA_DIR = env.PRODUCT_MCP_DATA_DIR;
const SCHEMAS_DIR = path.join(DATA_DIR, "schemas");
Expand All @@ -41,7 +42,7 @@ function resolveReferencePath(p: string): { abs: string; rel: string } {
// Accept absolute paths inside DATA_DIR or already-relative paths.
let abs = path.isAbsolute(p) ? p : path.resolve(DATA_DIR, p);
abs = path.resolve(abs);
if (!abs.startsWith(path.resolve(DATA_DIR))) {
if (!isPathInsideRoot(DATA_DIR, abs)) {
throw new Error(`Reference path escapes PRODUCT_MCP_DATA_DIR: ${p}`);
}
const rel = path.relative(DATA_DIR, abs);
Expand Down
3 changes: 2 additions & 1 deletion lib/sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import path from "node:path";
import yaml from "js-yaml";
import { REPO_ROOT, walkDirs, parseProductRelDir } from "./repo-walk";
import { parseMarkdown } from "./safe-matter";
import { isPathInsideRoot } from "./path-security";

export type SourceScope = "product" | "line" | "category";

Expand Down Expand Up @@ -35,7 +36,7 @@ export function classifyScope(localPath: string): SourceScope {
export function resolveManifestPath(mdDir: string, localPath: string): string | null {
if (!localPath) return null;
const abs = path.resolve(mdDir, localPath);
if (!abs.startsWith(REPO_ROOT)) return null;
if (!isPathInsideRoot(REPO_ROOT, abs)) return null;
return fs.existsSync(abs) ? abs : null;
}

Expand Down
23 changes: 23 additions & 0 deletions tests/unit/path-security.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import path from "node:path";
import { describe, expect, it } from "vitest";
import { isPathInsideRoot } from "@/lib/path-security";

describe("isPathInsideRoot", () => {
it("accepts the root itself and real descendants", () => {
const root = path.resolve("/tmp/product-data");
expect(isPathInsideRoot(root, root)).toBe(true);
expect(isPathInsideRoot(root, path.join(root, "server/dell/r770.pdf"))).toBe(true);
});

it("rejects parent and sibling paths", () => {
const root = path.resolve("/tmp/product-data");
expect(isPathInsideRoot(root, path.dirname(root))).toBe(false);
expect(isPathInsideRoot(root, `${root}-escape/leak.pdf`)).toBe(false);
});

it("does not reject in-root names that merely start with dots", () => {
const root = path.resolve("/tmp/product-data");
expect(isPathInsideRoot(root, path.join(root, "..not-parent/file.pdf"))).toBe(true);
});
});

53 changes: 52 additions & 1 deletion tests/unit/sources.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
import { describe, it, expect } from "vitest";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, it, expect, vi } from "vitest";
import { classifyScope, formatBytes } from "@/lib/sources";

const originalDataDir = process.env.PRODUCT_MCP_DATA_DIR;
const tempDirs: string[] = [];

afterEach(() => {
if (originalDataDir === undefined) delete process.env.PRODUCT_MCP_DATA_DIR;
else process.env.PRODUCT_MCP_DATA_DIR = originalDataDir;
for (const dir of tempDirs.splice(0)) {
fs.rmSync(dir, { recursive: true, force: true });
fs.rmSync(`${dir}-escape`, { recursive: true, force: true });
}
vi.resetModules();
});

describe("classifyScope (manifest local: → scope)", () => {
it("classifies bare or source-prefixed paths as product scope", () => {
expect(classifyScope("source/spec-sheet.pdf")).toBe("product");
Expand Down Expand Up @@ -45,3 +61,38 @@ describe("formatBytes", () => {
expect(formatBytes(3.25 * 1024 ** 3)).toBe("3.25 GB");
});
});

describe("resolveManifestPath", () => {
it("rejects sibling directories with the same string prefix as the data root", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "pdex-root-"));
tempDirs.push(root);
const productDir = path.join(root, "server", "dell", "poweredge", "r770");
const escapeDir = `${root}-escape`;
fs.mkdirSync(productDir, { recursive: true });
fs.mkdirSync(escapeDir, { recursive: true });
const leakPath = path.join(escapeDir, "leak.pdf");
fs.writeFileSync(leakPath, "not really a pdf");

process.env.PRODUCT_MCP_DATA_DIR = root;
vi.resetModules();
const { resolveManifestPath } = await import("@/lib/sources");

const manifestPath = path.relative(productDir, leakPath);
expect(resolveManifestPath(productDir, manifestPath)).toBeNull();
});

it("still resolves files that are actually inside the data root", async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "pdex-root-"));
tempDirs.push(root);
const productDir = path.join(root, "server", "dell", "poweredge", "r770");
const sourcePath = path.join(productDir, "source", "spec-sheet.pdf");
fs.mkdirSync(path.dirname(sourcePath), { recursive: true });
fs.writeFileSync(sourcePath, "not really a pdf");

process.env.PRODUCT_MCP_DATA_DIR = root;
vi.resetModules();
const { resolveManifestPath } = await import("@/lib/sources");

expect(resolveManifestPath(productDir, "source/spec-sheet.pdf")).toBe(sourcePath);
});
});