diff --git a/app/_components/NavBar.tsx b/app/_components/NavBar.tsx index 43641af..08f33a1 100644 --- a/app/_components/NavBar.tsx +++ b/app/_components/NavBar.tsx @@ -13,6 +13,7 @@ const ITEMS: { href: string; label: string }[] = [ { href: "/pipeline/extract", label: "Extract" }, { href: "/pipeline/runs", label: "Runs" }, { href: "/pipeline/audit", label: "Audit" }, + { href: "/compare", label: "Compare" }, { href: "/usage", label: "Usage" } ]; diff --git a/app/api/compare/route.ts b/app/api/compare/route.ts new file mode 100644 index 0000000..b90c913 --- /dev/null +++ b/app/api/compare/route.ts @@ -0,0 +1,74 @@ +/** + * GET /api/compare?left=&right= + * + * Returns the pairwise diff of two `extraction.json` files. Both query + * params accept a product slug — the latest extraction for each is loaded + * from disk and run through diffExtractions(). + * + * Run-id resolution (compare two runs of the same product) is intentionally + * out of scope for this iteration; see the linked issue for the follow-up. + * + * Response shape: + * { left: ExtractionIdentity, right: ExtractionIdentity, diff: ExtractionDiff } + * + * Errors: + * 400 — missing query params + * 404 — either slug doesn't resolve to an extraction.json on disk + * 409 — both sides resolve but to incompatible categories + */ +import { NextRequest, NextResponse } from "next/server"; +import { getExtraction } from "@/lib/extractions"; +import { diffExtractions } from "@/lib/pipeline/extraction-diff"; + +export const dynamic = "force-dynamic"; +export const runtime = "nodejs"; + +export async function GET(req: NextRequest) { + const url = new URL(req.url); + const leftSlug = (url.searchParams.get("left") ?? "").trim(); + const rightSlug = (url.searchParams.get("right") ?? "").trim(); + + if (!leftSlug || !rightSlug) { + return NextResponse.json( + { error: "left and right query params are required" }, + { status: 400 } + ); + } + if (leftSlug === rightSlug) { + return NextResponse.json( + { error: "left and right must reference different slugs" }, + { status: 400 } + ); + } + + const left = getExtraction(leftSlug); + if (!left) { + return NextResponse.json( + { error: `no extraction.json found for slug "${leftSlug}"` }, + { status: 404 } + ); + } + const right = getExtraction(rightSlug); + if (!right) { + return NextResponse.json( + { error: `no extraction.json found for slug "${rightSlug}"` }, + { status: 404 } + ); + } + + const leftCategory = typeof left.category === "string" ? left.category : null; + const rightCategory = typeof right.category === "string" ? right.category : null; + if (leftCategory && rightCategory && leftCategory !== rightCategory) { + return NextResponse.json( + { + error: "cross-category comparison is not supported", + leftCategory, + rightCategory, + }, + { status: 409 } + ); + } + + const diff = diffExtractions(left, right); + return NextResponse.json(diff); +} diff --git a/app/compare/_components/ComparePicker.tsx b/app/compare/_components/ComparePicker.tsx new file mode 100644 index 0000000..b654d16 --- /dev/null +++ b/app/compare/_components/ComparePicker.tsx @@ -0,0 +1,122 @@ +"use client"; + +import { useRouter, useSearchParams } from "next/navigation"; +import { useCallback } from "react"; + +export interface PickerOption { + slug: string; + label: string; + category: string; +} + +interface Props { + options: PickerOption[]; + leftSlug: string | null; + rightSlug: string | null; +} + +export default function ComparePicker({ options, leftSlug, rightSlug }: Props) { + const router = useRouter(); + const searchParams = useSearchParams(); + + const setSide = useCallback( + (side: "left" | "right", slug: string) => { + const params = new URLSearchParams(searchParams?.toString() ?? ""); + if (slug) { + params.set(side, slug); + } else { + params.delete(side); + } + const qs = params.toString(); + router.push(qs ? `/compare?${qs}` : "/compare"); + }, + [router, searchParams] + ); + + const swap = useCallback(() => { + if (!leftSlug || !rightSlug) return; + const params = new URLSearchParams(searchParams?.toString() ?? ""); + params.set("left", rightSlug); + params.set("right", leftSlug); + router.push(`/compare?${params.toString()}`); + }, [leftSlug, rightSlug, router, searchParams]); + + const optionsByCategory = groupByCategory(options); + + return ( +
+ setSide("left", slug)} + /> + + setSide("right", slug)} + /> +
+ ); +} + +interface SideSelectProps { + label: string; + value: string | null; + groups: Map; + excluded: string | null; + onChange: (slug: string) => void; +} + +function SideSelect({ label, value, groups, excluded, onChange }: SideSelectProps) { + return ( + + ); +} + +function groupByCategory(options: PickerOption[]): Map { + const out = new Map(); + for (const opt of options) { + const list = out.get(opt.category) ?? []; + list.push(opt); + out.set(opt.category, list); + } + for (const list of out.values()) { + list.sort((a, b) => a.label.localeCompare(b.label)); + } + return new Map(Array.from(out.entries()).sort((a, b) => a[0].localeCompare(b[0]))); +} diff --git a/app/compare/_components/DiffTable.tsx b/app/compare/_components/DiffTable.tsx new file mode 100644 index 0000000..1be12b1 --- /dev/null +++ b/app/compare/_components/DiffTable.tsx @@ -0,0 +1,300 @@ +"use client"; + +import { useState } from "react"; +import type { + DiffNode, + DiffStatus, + ExtractionDiff, + RowDiff, + ScalarDiff, + SetDiff, +} from "@/lib/pipeline/extraction-diff"; + +interface Props { + diff: ExtractionDiff; +} + +export default function DiffTable({ diff }: Props) { + const [collapseUnchanged, setCollapseUnchanged] = useState(true); + const visible = collapseUnchanged + ? diff.fields.filter((f) => f.status !== "unchanged") + : diff.fields; + + return ( +
+ +
+ + · + {visible.length} of {diff.fields.length} fields shown +
+ + + + + + + + + + + + {visible.length === 0 ? ( + + + + ) : ( + visible.map((node) => ) + )} + +
·field{diff.identityLeft.model || "left"}{diff.identityRight.model || "right"}
+ {collapseUnchanged ? "No differences — uncheck the box above to see all fields." : "No fields to compare."} +
+
+ ); +} + +function SummaryBar({ diff }: { diff: ExtractionDiff }) { + return ( +
+ + + + +
+ ); +} + +function Pill({ status, count }: { status: DiffStatus; count: number }) { + return ( + + {label(status)}: {count} + + ); +} + +function FieldRow({ node }: { node: DiffNode }) { + if (node.kind === "scalar") return ; + if (node.kind === "set") return ; + return ; +} + +function ScalarRow({ node }: { node: ScalarDiff }) { + const [open, setOpen] = useState(false); + const hasEvidence = !!(node.evidenceLeft || node.evidenceRight); + return ( + <> + + + + + +
{node.path}
+ {node.confidenceDelta !== null && Math.abs(node.confidenceDelta) > 0.01 ? ( +
+ Δconfidence: {fmtDelta(node.confidenceDelta)} +
+ ) : null} + + {renderValue(node.left)} + +
+ {renderValue(node.right)} + {hasEvidence ? ( + + ) : null} +
+ + + {open && hasEvidence ? ( + + + + + + ) : null} + + ); +} + +function SetRow({ node }: { node: SetDiff }) { + return ( + + + + + +
{node.path}
+
set
+ + {renderList(node.left)} + +
{renderList(node.right)}
+ {node.added.length > 0 ? ( +
+ {node.added.map(renderValue).join(", ")}
+ ) : null} + {node.removed.length > 0 ? ( +
− {node.removed.map(renderValue).join(", ")}
+ ) : null} + + + ); +} + +function RowGroup({ node }: { node: RowDiff }) { + const [open, setOpen] = useState(node.status !== "unchanged"); + return ( + <> + + + + + + + + + {node.fields.filter((f) => f.status === "removed").length} removed + + + {node.fields.filter((f) => f.status === "added").length} added ·{" "} + {node.fields.filter((f) => f.status === "changed").length} changed + + + {open + ? node.fields + .filter((c) => c.status !== "unchanged" || node.status === "unchanged") + .map((child) => ) + : null} + + ); +} + +function EvidencePair({ + left, + right, +}: { + left: { source?: string; page?: number; quote?: string; confidence?: number } | null; + right: { source?: string; page?: number; quote?: string; confidence?: number } | null; +}) { + return ( +
+ + +
+ ); +} + +function EvidenceCell({ + side, + e, +}: { + side: "left" | "right"; + e: { source?: string; page?: number; quote?: string; confidence?: number } | null; +}) { + if (!e) { + return ( +
no evidence ({side})
+ ); + } + return ( +
+
+ {e.source ?? "—"} + {typeof e.page === "number" ? ` · p${e.page}` : ""} + {typeof e.confidence === "number" ? ` · ${(e.confidence * 100).toFixed(0)}%` : ""} +
+ {e.quote ?
"{e.quote}"
: null} +
+ ); +} + +// ---------------------------------------------------------------------------- +// Render primitives +// ---------------------------------------------------------------------------- + +function renderValue(v: unknown): string { + if (v === null) return "—"; + if (v === undefined) return "—"; + if (typeof v === "string") return v.length > 0 ? v : `""`; + if (typeof v === "number" || typeof v === "boolean") return String(v); + if (Array.isArray(v)) return v.length === 0 ? "[]" : `[${v.length} items]`; + return JSON.stringify(v); +} + +function renderList(items: unknown[]): string { + if (items.length === 0) return "[]"; + return items.map(renderValue).join(", "); +} + +function StatusGlyph({ status }: { status: DiffStatus }) { + const map: Record = { + unchanged: "·", + changed: "≠", + added: "+", + removed: "−", + }; + return ( + + {map[status]} + + ); +} + +function rowCls(status: DiffStatus): string { + if (status === "unchanged") return "border-b border-white/5"; + return "border-b border-white/10 hover:bg-white/[0.02]"; +} + +function pillCls(status: DiffStatus): string { + switch (status) { + case "changed": return "bg-amber-500/10 text-amber-200 border border-amber-500/20"; + case "added": return "bg-emerald-500/10 text-emerald-200 border border-emerald-500/20"; + case "removed": return "bg-rose-500/10 text-rose-200 border border-rose-500/20"; + case "unchanged": return "bg-white/[0.04] text-white/60 border border-white/10"; + } +} + +function glyphCls(status: DiffStatus): string { + switch (status) { + case "changed": return "text-amber-300"; + case "added": return "text-emerald-300"; + case "removed": return "text-rose-300"; + case "unchanged": return "text-white/30"; + } +} + +function label(status: DiffStatus): string { + switch (status) { + case "changed": return "changed"; + case "added": return "added"; + case "removed": return "removed"; + case "unchanged": return "unchanged"; + } +} + +function fmtDelta(d: number): string { + const sign = d > 0 ? "+" : d < 0 ? "" : ""; + return `${sign}${d.toFixed(2)}`; +} diff --git a/app/compare/page.tsx b/app/compare/page.tsx index 488ef1c..3a2fed1 100644 --- a/app/compare/page.tsx +++ b/app/compare/page.tsx @@ -1,22 +1,117 @@ import Link from "next/link"; +import { listExtractions, getExtraction } from "@/lib/extractions"; +import { diffExtractions } from "@/lib/pipeline/extraction-diff"; +import ComparePicker, { type PickerOption } from "./_components/ComparePicker"; +import DiffTable from "./_components/DiffTable"; + +export const dynamic = "force-dynamic"; + +interface PageProps { + searchParams: { left?: string; right?: string }; +} + +export default function ComparePage({ searchParams }: PageProps) { + const all = listExtractions(); + const options: PickerOption[] = all.map((s) => ({ + slug: s.slug, + label: `${s.vendor || "—"} · ${s.model || s.slug}`, + category: s.category, + })); + + const leftSlug = (searchParams.left ?? "").trim() || null; + const rightSlug = (searchParams.right ?? "").trim() || null; -export default function ComparePage() { return ( -
-

Compare

+
+
+

Compare

+

+ Side-by-side diff of two structured product extractions. Pick a left and right product + below — the diff walks every field, preserves per-value evidence, and highlights where + confidence shifted between the two extractions. +

+
+ + {options.length < 2 ? ( + + ) : ( + <> + + + + )} +
+ ); +} + +function CompareBody({ + leftSlug, + rightSlug, +}: { + leftSlug: string | null; + rightSlug: string | null; +}) { + if (!leftSlug || !rightSlug) { + return (
+

Select a product for each side to see the diff.

+
+ ); + } + if (leftSlug === rightSlug) { + return ( +
+

Pick two different products. Comparing a slug against itself is always a no-op.

+
+ ); + } + + const left = getExtraction(leftSlug); + const right = getExtraction(rightSlug); + if (!left || !right) { + return ( +

- Comparison view is offline pending the new schema. The previous version was wired to the - old DB tables, which were removed in the structured-extraction pivot. + Couldn't load one of the extractions: + {!left ? ` "${leftSlug}"` : ""} + {!right ? ` "${rightSlug}"` : ""}.

-

- Once we have ≥2 extractions on disk, this page will be rebuilt to read{" "} - extraction.json directly. +

+ ← reset picker

-

- ← back to extractions +

+ ); + } + if ( + typeof left.category === "string" && + typeof right.category === "string" && + left.category !== right.category + ) { + return ( +
+

+ Cross-category comparison isn't supported. {leftSlug} is{" "} + {String(left.category)} and {rightSlug} is{" "} + {String(right.category)}.

+ ); + } + + const diff = diffExtractions(left, right); + return ; +} + +function EmptyState({ count }: { count: number }) { + return ( +
+

+ Compare needs ≥ 2 extractions on disk; currently {count} {count === 1 ? "exists" : "exist"}. +

+

+ Run the pipeline against a second product first — see{" "} + /pipeline/extract. +

); } diff --git a/lib/pipeline/extraction-diff.ts b/lib/pipeline/extraction-diff.ts new file mode 100644 index 0000000..c455d50 --- /dev/null +++ b/lib/pipeline/extraction-diff.ts @@ -0,0 +1,501 @@ +/** + * Pure pairwise diff for two `extraction.json` trees. + * + * The viewer's /compare page calls this to render side-by-side deltas with + * per-value evidence preserved. The function is intentionally I/O-free so + * it stays trivially testable. + * + * Shape rules (mirroring lib/extractions.ts): + * - Most leaf fields are `{ value, evidence }` blocks. Scalars at the top + * level (vendor, slug, model, category, product_line, description) are + * compared raw. + * - `extraction_metadata` is informational, not a value-bearing field — + * it's surfaced separately as a context block, never as diff rows. + * - Arrays of records (cpu_skus, pcie_slots, riser_configs, ...) are + * keyed by `slug ?? id ?? sku ?? name`, with positional fallback when + * none of those keys are present. + * - Arrays of scalars (workload_tags.value, processor_family.value, ...) + * are compared as sets for added/removed and as a single row for the + * overall list when nothing keyed matches. + */ +import type { Extraction, Evidence } from "@/lib/extractions"; + +export type DiffStatus = "unchanged" | "added" | "removed" | "changed"; + +export interface ScalarDiff { + kind: "scalar"; + path: string; + status: DiffStatus; + left: unknown; + right: unknown; + evidenceLeft: Evidence | null; + evidenceRight: Evidence | null; + confidenceDelta: number | null; +} + +export interface SetDiff { + kind: "set"; + path: string; + status: DiffStatus; + left: unknown[]; + right: unknown[]; + added: unknown[]; + removed: unknown[]; + evidenceLeft: Evidence | null; + evidenceRight: Evidence | null; +} + +export interface RowDiff { + kind: "row"; + path: string; + status: DiffStatus; + /** Stable key identifying the row across both sides. */ + rowKey: string; + /** Per-field deltas inside this row. Only populated when status='changed'. */ + fields: DiffNode[]; + /** Full record on each side, for context. */ + leftRecord: Record | null; + rightRecord: Record | null; +} + +export type DiffNode = ScalarDiff | SetDiff | RowDiff; + +export interface ExtractionDiff { + /** Top-level identity context — vendor/slug/category etc. */ + identityLeft: ExtractionIdentity; + identityRight: ExtractionIdentity; + /** Field-level diffs, ordered by first appearance in the left extraction. */ + fields: DiffNode[]; + /** Aggregate counters for the diff table header. */ + summary: DiffSummary; +} + +export interface ExtractionIdentity { + vendor: string; + category: string; + product_line: string; + slug: string; + model: string; + schema_version: string | null; + extracted_at: string | null; +} + +export interface DiffSummary { + unchanged: number; + changed: number; + added: number; + removed: number; +} + +// ---------------------------------------------------------------------------- +// Top-level entry point +// ---------------------------------------------------------------------------- + +const IDENTITY_KEYS = new Set([ + "vendor", + "category", + "subcategory", + "product_line", + "slug", + "model", + "description", + "overlays", + "extraction_metadata", + "sources", +]); + +export function diffExtractions(left: Extraction, right: Extraction): ExtractionDiff { + const fieldKeys = unionOrderedKeys(left, right, (k) => !IDENTITY_KEYS.has(k)); + const fields: DiffNode[] = []; + for (const key of fieldKeys) { + const node = diffField(key, (left as any)[key], (right as any)[key]); + if (node) fields.push(node); + } + return { + identityLeft: readIdentity(left), + identityRight: readIdentity(right), + fields, + summary: summarize(fields), + }; +} + +function readIdentity(d: Extraction): ExtractionIdentity { + return { + vendor: unwrap(d.vendor) ?? "", + category: unwrap(d.category) ?? "", + product_line: unwrap(d.product_line) ?? "", + slug: unwrap(d.slug) ?? "", + model: unwrap(d.model) ?? "", + schema_version: d.extraction_metadata?.schema_version ?? null, + extracted_at: d.extraction_metadata?.extracted_at ?? null, + }; +} + +// ---------------------------------------------------------------------------- +// Field dispatch +// ---------------------------------------------------------------------------- + +function diffField(path: string, a: unknown, b: unknown): DiffNode | null { + // Missing on either side — short-circuit. Genuinely-absent fields skip + // the set/array/scalar machinery entirely. + if (a === undefined && b === undefined) return null; + if (a === undefined) return makeRemoteOnly(path, b, "added"); + if (b === undefined) return makeRemoteOnly(path, a, "removed"); + + // Plain scalars (top-level identity-adjacent strings we let through). + if (!isValueWrapper(a) && !isValueWrapper(b) && !Array.isArray(a) && !Array.isArray(b)) { + if (typeof a !== "object" || a === null || typeof b !== "object" || b === null) { + return rawScalar(path, a, b); + } + } + + // Array-of-records vs array-of-scalars: peek at the first element of either side. + const aArr = arrayOfWrapperValue(a); + const bArr = arrayOfWrapperValue(b); + if (aArr !== null || bArr !== null) { + const aa = aArr ?? []; + const bb = bArr ?? []; + if (looksLikeRecordList(aa) || looksLikeRecordList(bb)) { + return diffRowList(path, aa, bb); + } + return diffScalarSet(path, aa, bb, evidenceOf(a), evidenceOf(b)); + } + + // {value, evidence} wrapper — the common case. + const va = isValueWrapper(a) ? (a as any).value : a; + const vb = isValueWrapper(b) ? (b as any).value : b; + const ea = evidenceOf(a); + const eb = evidenceOf(b); + return wrappedScalar(path, va, vb, ea, eb); +} + +function makeRemoteOnly(path: string, present: unknown, status: "added" | "removed"): DiffNode { + if (isValueWrapper(present)) { + return { + kind: "scalar", + path, + status, + left: status === "removed" ? (present as any).value : undefined, + right: status === "added" ? (present as any).value : undefined, + evidenceLeft: status === "removed" ? evidenceOf(present) : null, + evidenceRight: status === "added" ? evidenceOf(present) : null, + confidenceDelta: null, + }; + } + if (Array.isArray(present)) { + return { + kind: "set", + path, + status, + left: status === "removed" ? present : [], + right: status === "added" ? present : [], + added: status === "added" ? present : [], + removed: status === "removed" ? present : [], + evidenceLeft: null, + evidenceRight: null, + }; + } + return { + kind: "scalar", + path, + status, + left: status === "removed" ? present : undefined, + right: status === "added" ? present : undefined, + evidenceLeft: null, + evidenceRight: null, + confidenceDelta: null, + }; +} + +// ---------------------------------------------------------------------------- +// Scalar comparison +// ---------------------------------------------------------------------------- + +function rawScalar(path: string, a: unknown, b: unknown): ScalarDiff { + const equal = scalarsEqual(a, b); + return { + kind: "scalar", + path, + status: equal ? "unchanged" : "changed", + left: a, + right: b, + evidenceLeft: null, + evidenceRight: null, + confidenceDelta: null, + }; +} + +function wrappedScalar( + path: string, + a: unknown, + b: unknown, + ea: Evidence | null, + eb: Evidence | null +): ScalarDiff { + const equal = scalarsEqual(a, b); + const ca = typeof ea?.confidence === "number" ? ea.confidence : null; + const cb = typeof eb?.confidence === "number" ? eb.confidence : null; + const confidenceDelta = ca !== null && cb !== null ? +(cb - ca).toFixed(4) : null; + return { + kind: "scalar", + path, + status: equal ? "unchanged" : "changed", + left: a, + right: b, + evidenceLeft: ea, + evidenceRight: eb, + confidenceDelta, + }; +} + +function scalarsEqual(a: unknown, b: unknown): boolean { + if (a === b) return true; + if (a === null || b === null) return false; + if (Array.isArray(a) && Array.isArray(b)) { + if (a.length !== b.length) return false; + return a.every((v, i) => scalarsEqual(v, b[i])); + } + if (typeof a === "object" && typeof b === "object") { + // Defensive equality on nested objects — exact-shape match required. + const ak = Object.keys(a as object).sort(); + const bk = Object.keys(b as object).sort(); + if (ak.length !== bk.length || ak.some((k, i) => k !== bk[i])) return false; + return ak.every((k) => scalarsEqual((a as any)[k], (b as any)[k])); + } + return false; +} + +// ---------------------------------------------------------------------------- +// Set diff (arrays of scalars) +// ---------------------------------------------------------------------------- + +function diffScalarSet( + path: string, + a: unknown[], + b: unknown[], + ea: Evidence | null, + eb: Evidence | null +): SetDiff { + const aSet = new Set(a.map(stableJson)); + const bSet = new Set(b.map(stableJson)); + const added: unknown[] = []; + const removed: unknown[] = []; + for (const item of b) { + if (!aSet.has(stableJson(item))) added.push(item); + } + for (const item of a) { + if (!bSet.has(stableJson(item))) removed.push(item); + } + let status: DiffStatus = "unchanged"; + if (added.length > 0 || removed.length > 0) status = "changed"; + return { + kind: "set", + path, + status, + left: a, + right: b, + added, + removed, + evidenceLeft: ea, + evidenceRight: eb, + }; +} + +// ---------------------------------------------------------------------------- +// Row diff (arrays of records keyed by slug/id/sku/name) +// ---------------------------------------------------------------------------- + +const ROW_KEY_PREFERENCE = ["slug", "id", "sku", "name", "code"]; + +function diffRowList(path: string, a: unknown[], b: unknown[]): DiffNode { + const keyField = pickRowKeyField(a, b); + const aMap = indexRows(a, keyField); + const bMap = indexRows(b, keyField); + const seen = new Set(); + const ordered: string[] = []; + for (const k of aMap.keys()) { ordered.push(k); seen.add(k); } + for (const k of bMap.keys()) { if (!seen.has(k)) ordered.push(k); } + + const rowDiffs: DiffNode[] = []; + for (const rowKey of ordered) { + const left = aMap.get(rowKey) ?? null; + const right = bMap.get(rowKey) ?? null; + rowDiffs.push(diffSingleRow(`${path}[${rowKey}]`, rowKey, left, right)); + } + + // Aggregate into one parent SetDiff-like wrapper using the row kind itself + // so the consumer can render groups. We return the parent as a synthetic + // "row" entry with status='changed' iff any child changed. + const anyDelta = rowDiffs.some((r) => r.status !== "unchanged"); + return { + kind: "row", + path, + status: anyDelta ? "changed" : "unchanged", + rowKey: keyField ?? "(positional)", + fields: rowDiffs, + leftRecord: null, + rightRecord: null, + }; +} + +function diffSingleRow( + path: string, + rowKey: string, + left: Record | null, + right: Record | null +): RowDiff { + if (left === null && right !== null) { + return { + kind: "row", + path, + status: "added", + rowKey, + fields: [], + leftRecord: null, + rightRecord: right, + }; + } + if (right === null && left !== null) { + return { + kind: "row", + path, + status: "removed", + rowKey, + fields: [], + leftRecord: left, + rightRecord: null, + }; + } + if (!left || !right) { + return { + kind: "row", path, status: "unchanged", rowKey, fields: [], leftRecord: null, rightRecord: null + }; + } + const keys = unionOrderedKeys(left, right); + const fieldDeltas: DiffNode[] = []; + for (const k of keys) { + const child = diffField(`${path}.${k}`, left[k], right[k]); + if (child) fieldDeltas.push(child); + } + const anyDelta = fieldDeltas.some((f) => f.status !== "unchanged"); + return { + kind: "row", + path, + status: anyDelta ? "changed" : "unchanged", + rowKey, + fields: fieldDeltas, + leftRecord: left, + rightRecord: right, + }; +} + +function pickRowKeyField(a: unknown[], b: unknown[]): string | null { + const sample = (a.length > 0 ? a : b)[0]; + if (!sample || typeof sample !== "object") return null; + const keys = Object.keys(sample as object); + for (const candidate of ROW_KEY_PREFERENCE) { + if (keys.includes(candidate)) return candidate; + } + return null; +} + +function indexRows(rows: unknown[], keyField: string | null): Map> { + const out = new Map>(); + rows.forEach((row, idx) => { + if (!row || typeof row !== "object") return; + const r = row as Record; + const rawKey = keyField ? r[keyField] : null; + const key = + typeof rawKey === "string" && rawKey.length > 0 + ? rawKey + : typeof rawKey === "number" + ? String(rawKey) + : `#${idx}`; + if (!out.has(key)) out.set(key, r); + }); + return out; +} + +// ---------------------------------------------------------------------------- +// Helpers +// ---------------------------------------------------------------------------- + +function unionOrderedKeys( + a: object, + b: object, + filter: (k: string) => boolean = () => true +): string[] { + const seen = new Set(); + const out: string[] = []; + for (const k of Object.keys(a ?? {})) { + if (!filter(k)) continue; + if (!seen.has(k)) { out.push(k); seen.add(k); } + } + for (const k of Object.keys(b ?? {})) { + if (!filter(k)) continue; + if (!seen.has(k)) { out.push(k); seen.add(k); } + } + return out; +} + +function isValueWrapper(v: unknown): boolean { + return ( + v !== null && + typeof v === "object" && + !Array.isArray(v) && + "value" in (v as object) + ); +} + +function evidenceOf(v: unknown): Evidence | null { + if (v === null || typeof v !== "object") return null; + const ev = (v as any).evidence; + if (!ev || typeof ev !== "object") return null; + return ev as Evidence; +} + +function arrayOfWrapperValue(v: unknown): unknown[] | null { + if (Array.isArray(v)) return v; + if (isValueWrapper(v)) { + const inner = (v as any).value; + if (Array.isArray(inner)) return inner; + } + return null; +} + +function looksLikeRecordList(arr: unknown[]): boolean { + if (arr.length === 0) return false; + const first = arr[0]; + return first !== null && typeof first === "object" && !Array.isArray(first); +} + +function unwrap(field: unknown): T | null { + if (field == null) return null; + if (typeof field === "object" && field !== null && "value" in (field as object)) { + return (field as { value: T }).value; + } + return field as T; +} + +function stableJson(v: unknown): string { + if (v === null || typeof v !== "object") return JSON.stringify(v); + if (Array.isArray(v)) return `[${v.map(stableJson).join(",")}]`; + const keys = Object.keys(v as object).sort(); + return `{${keys.map((k) => `${JSON.stringify(k)}:${stableJson((v as any)[k])}`).join(",")}}`; +} + +function summarize(fields: DiffNode[]): DiffSummary { + const s: DiffSummary = { unchanged: 0, changed: 0, added: 0, removed: 0 }; + for (const f of fields) { + s[f.status]++; + if (f.kind === "row" && f.status === "changed") { + // Descend so the summary reflects per-cell churn, not just top-level counts. + for (const child of f.fields) { + if (child.status === "changed" || child.status === "added" || child.status === "removed") { + s[child.status]++; + } + } + } + } + return s; +} diff --git a/tests/unit/extraction-diff.test.ts b/tests/unit/extraction-diff.test.ts new file mode 100644 index 0000000..416ac59 --- /dev/null +++ b/tests/unit/extraction-diff.test.ts @@ -0,0 +1,227 @@ +import { describe, it, expect } from "vitest"; +import { diffExtractions } from "@/lib/pipeline/extraction-diff"; +import type { Extraction } from "@/lib/extractions"; + +function makeExtraction(overrides: Partial = {}): Extraction { + return { + vendor: "Dell", + category: "server", + product_line: "poweredge", + slug: "r770", + model: "PowerEdge R770", + description: "test", + overlays: [], + extraction_metadata: { schema_version: "v1", extracted_at: "2026-05-01T00:00:00Z" }, + sources: [], + ...overrides, + }; +} + +function scalar(value: unknown, confidence = 0.9) { + return { + value, + evidence: { + source: "source/spec.md", + anchor: "anchor", + page: 1, + quote: "q", + confidence, + }, + }; +} + +describe("diffExtractions — identity & summary", () => { + it("returns zero non-unchanged when both trees are identical", () => { + const a = makeExtraction({ + server_type: scalar("general-purpose"), + rack_units: scalar(2), + } as Partial); + const b = makeExtraction({ + server_type: scalar("general-purpose"), + rack_units: scalar(2), + } as Partial); + const diff = diffExtractions(a, b); + expect(diff.summary.changed).toBe(0); + expect(diff.summary.added).toBe(0); + expect(diff.summary.removed).toBe(0); + expect(diff.summary.unchanged).toBeGreaterThan(0); + }); + + it("captures top-level identity on both sides", () => { + const a = makeExtraction({ slug: "left-thing", model: "Left" }); + const b = makeExtraction({ slug: "right-thing", model: "Right" }); + const diff = diffExtractions(a, b); + expect(diff.identityLeft.slug).toBe("left-thing"); + expect(diff.identityRight.slug).toBe("right-thing"); + expect(diff.identityLeft.schema_version).toBe("v1"); + }); + + it("excludes identity keys from field rows", () => { + const a = makeExtraction({ server_type: scalar("a") } as Partial); + const b = makeExtraction({ server_type: scalar("b") } as Partial); + const diff = diffExtractions(a, b); + const paths = diff.fields.map((f) => f.path); + expect(paths).not.toContain("vendor"); + expect(paths).not.toContain("extraction_metadata"); + expect(paths).not.toContain("slug"); + }); +}); + +describe("diffExtractions — scalar fields", () => { + it("flags a changed scalar with both evidence blocks preserved", () => { + const a = makeExtraction({ + server_type: scalar("general-purpose", 0.95), + } as Partial); + const b = makeExtraction({ + server_type: scalar("ai-optimized", 0.7), + } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "server_type"); + expect(row).toBeDefined(); + expect(row?.status).toBe("changed"); + expect(row?.kind).toBe("scalar"); + if (row?.kind === "scalar") { + expect(row.left).toBe("general-purpose"); + expect(row.right).toBe("ai-optimized"); + expect(row.evidenceLeft?.confidence).toBe(0.95); + expect(row.evidenceRight?.confidence).toBe(0.7); + expect(row.confidenceDelta).toBeCloseTo(-0.25, 2); + } + }); + + it("treats null-equal-to-null as unchanged", () => { + const a = makeExtraction({ eol_date: scalar(null) } as Partial); + const b = makeExtraction({ eol_date: scalar(null) } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "eol_date"); + expect(row?.status).toBe("unchanged"); + }); + + it("flags as added when field is only present on the right", () => { + const a = makeExtraction(); + const b = makeExtraction({ generation: scalar("17G") } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "generation"); + expect(row?.status).toBe("added"); + if (row?.kind === "scalar") { + expect(row.right).toBe("17G"); + expect(row.left).toBeUndefined(); + } + }); + + it("flags as removed when field is only present on the left", () => { + const a = makeExtraction({ generation: scalar("16G") } as Partial); + const b = makeExtraction(); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "generation"); + expect(row?.status).toBe("removed"); + if (row?.kind === "scalar") { + expect(row.left).toBe("16G"); + expect(row.right).toBeUndefined(); + } + }); +}); + +describe("diffExtractions — scalar sets", () => { + it("detects added/removed members in a tag list", () => { + const a = makeExtraction({ + workload_tags: scalar(["virtualization", "ai-inference"]), + } as Partial); + const b = makeExtraction({ + workload_tags: scalar(["virtualization", "database", "hyperscale"]), + } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "workload_tags"); + expect(row?.kind).toBe("set"); + if (row?.kind === "set") { + expect(row.added.sort()).toEqual(["database", "hyperscale"].sort()); + expect(row.removed).toEqual(["ai-inference"]); + expect(row.status).toBe("changed"); + } + }); + + it("treats reorder of the same members as unchanged", () => { + const a = makeExtraction({ + workload_tags: scalar(["a", "b", "c"]), + } as Partial); + const b = makeExtraction({ + workload_tags: scalar(["c", "a", "b"]), + } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "workload_tags"); + expect(row?.status).toBe("unchanged"); + }); +}); + +describe("diffExtractions — keyed row lists", () => { + it("keys by slug and surfaces per-row deltas", () => { + const a = makeExtraction({ + cpu_skus: [ + { slug: "xeon-1", value: { cores: 32 } }, + { slug: "xeon-2", value: { cores: 64 } }, + ], + } as unknown as Partial); + const b = makeExtraction({ + cpu_skus: [ + { slug: "xeon-1", value: { cores: 32 } }, + { slug: "xeon-2", value: { cores: 80 } }, + { slug: "xeon-3", value: { cores: 96 } }, + ], + } as unknown as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "cpu_skus"); + expect(row?.kind).toBe("row"); + if (row?.kind === "row") { + expect(row.status).toBe("changed"); + expect(row.rowKey).toBe("slug"); + const added = row.fields.find((f) => f.status === "added"); + const changed = row.fields.find((f) => f.status === "changed"); + expect(added?.path).toContain("xeon-3"); + expect(changed?.path).toContain("xeon-2"); + } + }); + + it("falls back to positional keys when no stable key field exists", () => { + const a = makeExtraction({ + arr: [{ foo: 1 }, { foo: 2 }], + } as unknown as Partial); + const b = makeExtraction({ + arr: [{ foo: 1 }, { foo: 3 }], + } as unknown as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "arr"); + expect(row?.kind).toBe("row"); + if (row?.kind === "row") { + expect(row.rowKey).toBe("(positional)"); + const changed = row.fields.find((f) => f.status === "changed"); + expect(changed).toBeDefined(); + } + }); +}); + +describe("diffExtractions — confidence delta", () => { + it("computes a numeric delta only when both sides supply confidence", () => { + const a = makeExtraction({ x: scalar("v", 0.4) } as Partial); + const b = makeExtraction({ x: scalar("v", 0.9) } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "x"); + if (row?.kind === "scalar") { + expect(row.status).toBe("unchanged"); // same value, just confidence shift + expect(row.confidenceDelta).toBeCloseTo(0.5, 2); + } + }); + + it("returns null delta when one side has no confidence", () => { + const a = makeExtraction({ + x: { value: "v", evidence: { source: "s" } }, + } as Partial); + const b = makeExtraction({ + x: { value: "v", evidence: { source: "s", confidence: 0.9 } }, + } as Partial); + const diff = diffExtractions(a, b); + const row = diff.fields.find((f) => f.path === "x"); + if (row?.kind === "scalar") { + expect(row.confidenceDelta).toBeNull(); + } + }); +});